Merging PR_218 openai_rev package with new streamlit chat app

2023-04-27 20:29:30 -04:00
parent 479b8d6d10
commit 355dee533b
8378 changed files with 2931636 additions and 3 deletions
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/algorithm.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/algorithm.h
@@ -0,0 +1,33 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/result.h"
+
+namespace arrow {
+
+template <typename InputIterator, typename OutputIterator, typename UnaryOperation>
+Status MaybeTransform(InputIterator first, InputIterator last, OutputIterator out,
+                      UnaryOperation unary_op) {
+  for (; first != last; ++first, (void)++out) {
+    ARROW_ASSIGN_OR_RAISE(*out, unary_op(*first));
+  }
+  return Status::OK();
+}
+
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/align_util.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/align_util.h
@@ -0,0 +1,68 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+
+#include "arrow/util/bit_util.h"
+
+namespace arrow {
+namespace internal {
+
+struct BitmapWordAlignParams {
+  int64_t leading_bits;
+  int64_t trailing_bits;
+  int64_t trailing_bit_offset;
+  const uint8_t* aligned_start;
+  int64_t aligned_bits;
+  int64_t aligned_words;
+};
+
+// Compute parameters for accessing a bitmap using aligned word instructions.
+// The returned parameters describe:
+// - a leading area of size `leading_bits` before the aligned words
+// - a word-aligned area of size `aligned_bits`
+// - a trailing area of size `trailing_bits` after the aligned words
+template <uint64_t ALIGN_IN_BYTES>
+inline BitmapWordAlignParams BitmapWordAlign(const uint8_t* data, int64_t bit_offset,
+                                             int64_t length) {
+  static_assert(bit_util::IsPowerOf2(ALIGN_IN_BYTES),
+                "ALIGN_IN_BYTES should be a positive power of two");
+  constexpr uint64_t ALIGN_IN_BITS = ALIGN_IN_BYTES * 8;
+
+  BitmapWordAlignParams p;
+
+  // Compute a "bit address" that we can align up to ALIGN_IN_BITS.
+  // We don't care about losing the upper bits since we are only interested in the
+  // difference between both addresses.
+  const uint64_t bit_addr =
+      reinterpret_cast<size_t>(data) * 8 + static_cast<uint64_t>(bit_offset);
+  const uint64_t aligned_bit_addr = bit_util::RoundUpToPowerOf2(bit_addr, ALIGN_IN_BITS);
+
+  p.leading_bits = std::min<int64_t>(length, aligned_bit_addr - bit_addr);
+  p.aligned_words = (length - p.leading_bits) / ALIGN_IN_BITS;
+  p.aligned_bits = p.aligned_words * ALIGN_IN_BITS;
+  p.trailing_bits = length - p.leading_bits - p.aligned_bits;
+  p.trailing_bit_offset = bit_offset + p.leading_bits + p.aligned_bits;
+
+  p.aligned_start = data + (bit_offset + p.leading_bits) / 8;
+  return p;
+}
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/aligned_storage.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/aligned_storage.h
@@ -0,0 +1,145 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstring>
+#include <type_traits>
+#include <utility>
+
+#include "arrow/util/launder.h"
+#include "arrow/util/macros.h"
+
+namespace arrow {
+namespace internal {
+
+template <typename T>
+class AlignedStorage {
+ public:
+  static constexpr bool can_memcpy = std::is_trivial<T>::value;
+
+  constexpr T* get() noexcept {
+    return arrow::internal::launder(reinterpret_cast<T*>(&data_));
+  }
+
+  constexpr const T* get() const noexcept {
+    // Use fully qualified name to avoid ambiguities with MSVC (ARROW-14800)
+    return arrow::internal::launder(reinterpret_cast<const T*>(&data_));
+  }
+
+  void destroy() noexcept {
+    if (!std::is_trivially_destructible<T>::value) {
+      get()->~T();
+    }
+  }
+
+  template <typename... A>
+  void construct(A&&... args) noexcept {
+    new (&data_) T(std::forward<A>(args)...);
+  }
+
+  template <typename V>
+  void assign(V&& v) noexcept {
+    *get() = std::forward<V>(v);
+  }
+
+  void move_construct(AlignedStorage* other) noexcept {
+    new (&data_) T(std::move(*other->get()));
+  }
+
+  void move_assign(AlignedStorage* other) noexcept { *get() = std::move(*other->get()); }
+
+  template <bool CanMemcpy = can_memcpy>
+  static typename std::enable_if<CanMemcpy>::type move_construct_several(
+      AlignedStorage* ARROW_RESTRICT src, AlignedStorage* ARROW_RESTRICT dest, size_t n,
+      size_t memcpy_length) noexcept {
+    memcpy(dest->get(), src->get(), memcpy_length * sizeof(T));
+  }
+
+  template <bool CanMemcpy = can_memcpy>
+  static typename std::enable_if<CanMemcpy>::type
+  move_construct_several_and_destroy_source(AlignedStorage* ARROW_RESTRICT src,
+                                            AlignedStorage* ARROW_RESTRICT dest, size_t n,
+                                            size_t memcpy_length) noexcept {
+    memcpy(dest->get(), src->get(), memcpy_length * sizeof(T));
+  }
+
+  template <bool CanMemcpy = can_memcpy>
+  static typename std::enable_if<!CanMemcpy>::type move_construct_several(
+      AlignedStorage* ARROW_RESTRICT src, AlignedStorage* ARROW_RESTRICT dest, size_t n,
+      size_t memcpy_length) noexcept {
+    for (size_t i = 0; i < n; ++i) {
+      new (dest[i].get()) T(std::move(*src[i].get()));
+    }
+  }
+
+  template <bool CanMemcpy = can_memcpy>
+  static typename std::enable_if<!CanMemcpy>::type
+  move_construct_several_and_destroy_source(AlignedStorage* ARROW_RESTRICT src,
+                                            AlignedStorage* ARROW_RESTRICT dest, size_t n,
+                                            size_t memcpy_length) noexcept {
+    for (size_t i = 0; i < n; ++i) {
+      new (dest[i].get()) T(std::move(*src[i].get()));
+      src[i].destroy();
+    }
+  }
+
+  static void move_construct_several(AlignedStorage* ARROW_RESTRICT src,
+                                     AlignedStorage* ARROW_RESTRICT dest,
+                                     size_t n) noexcept {
+    move_construct_several(src, dest, n, n);
+  }
+
+  static void move_construct_several_and_destroy_source(
+      AlignedStorage* ARROW_RESTRICT src, AlignedStorage* ARROW_RESTRICT dest,
+      size_t n) noexcept {
+    move_construct_several_and_destroy_source(src, dest, n, n);
+  }
+
+  static void destroy_several(AlignedStorage* p, size_t n) noexcept {
+    if (!std::is_trivially_destructible<T>::value) {
+      for (size_t i = 0; i < n; ++i) {
+        p[i].destroy();
+      }
+    }
+  }
+
+ private:
+#if !defined(__clang__) && defined(__GNUC__) && defined(__i386__)
+  // Workaround for GCC bug on i386:
+  //   alignof(int64 | float64) can give different results depending on the
+  //   compilation context, leading to internal ABI mismatch manifesting
+  //   in incorrect propagation of Result<int64 | float64> between
+  //   compilation units.
+  // (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88115)
+  static constexpr size_t alignment() {
+    if (std::is_integral_v<T> && sizeof(T) == 8) {
+      return 4;
+    } else if (std::is_floating_point_v<T> && sizeof(T) == 8) {
+      return 4;
+    }
+    return alignof(T);
+  }
+
+  typename std::aligned_storage<sizeof(T), alignment()>::type data_;
+#else
+  typename std::aligned_storage<sizeof(T), alignof(T)>::type data_;
+#endif
+};
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/async_generator.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/async_generator.h
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/async_generator_fwd.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/async_generator_fwd.h
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <functional>
+
+#include "arrow/type_fwd.h"
+
+namespace arrow {
+
+template <typename T>
+using AsyncGenerator = std::function<Future<T>()>;
+
+template <typename T, typename V>
+class MappingGenerator;
+
+template <typename T, typename ComesAfter, typename IsNext>
+class SequencingGenerator;
+
+template <typename T, typename V>
+class TransformingGenerator;
+
+template <typename T>
+class SerialReadaheadGenerator;
+
+template <typename T>
+class ReadaheadGenerator;
+
+template <typename T>
+class PushGenerator;
+
+template <typename T>
+class MergedGenerator;
+
+template <typename T>
+struct Enumerated;
+
+template <typename T>
+class EnumeratingGenerator;
+
+template <typename T>
+class TransferringGenerator;
+
+template <typename T>
+class BackgroundGenerator;
+
+template <typename T>
+class GeneratorIterator;
+
+template <typename T>
+struct CancellableGenerator;
+
+template <typename T>
+class DefaultIfEmptyGenerator;
+
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/async_util.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/async_util.h
@@ -0,0 +1,410 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <atomic>
+#include <functional>
+#include <list>
+#include <memory>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/cancel.h"
+#include "arrow/util/functional.h"
+#include "arrow/util/future.h"
+#include "arrow/util/iterator.h"
+#include "arrow/util/mutex.h"
+#include "arrow/util/thread_pool.h"
+
+namespace arrow {
+
+using internal::FnOnce;
+
+namespace util {
+
+/// A utility which keeps tracks of, and schedules, asynchronous tasks
+///
+/// An asynchronous task has a synchronous component and an asynchronous component.
+/// The synchronous component typically schedules some kind of work on an external
+/// resource (e.g. the I/O thread pool or some kind of kernel-based asynchronous
+/// resource like io_uring).  The asynchronous part represents the work
+/// done on that external resource.  Executing the synchronous part will be referred
+/// to as "submitting the task" since this usually includes submitting the asynchronous
+/// portion to the external thread pool.
+///
+/// By default the scheduler will submit the task (execute the synchronous part) as
+/// soon as it is added, assuming the underlying thread pool hasn't terminated or the
+/// scheduler hasn't aborted.  In this mode, the scheduler is simply acting as
+/// a simple task group.
+///
+/// A task scheduler starts with an initial task.  That task, and all subsequent tasks
+/// are free to add subtasks.  Once all submitted tasks finish the scheduler will
+/// finish.  Note, it is not an error to add additional tasks after a scheduler has
+/// aborted. These tasks will be ignored and never submitted.  The scheduler returns a
+/// future which will complete when all submitted tasks have finished executing.  Once all
+/// tasks have been finsihed the scheduler is invalid and should no longer be used.
+///
+/// Task failure (either the synchronous portion or the asynchronous portion) will cause
+/// the scheduler to enter an aborted state.  The first such failure will be reported in
+/// the final task future.
+class ARROW_EXPORT AsyncTaskScheduler {
+ public:
+  /// Destructor for AsyncTaskScheduler
+  ///
+  /// The lifetime of the task scheduled is managed automatically.  The scheduler
+  /// will remain valid while any tasks are running (and can always be safely accessed)
+  /// within tasks) and will be destroyed as soon as all tasks have finished.
+  virtual ~AsyncTaskScheduler() = default;
+  /// An interface for a task
+  ///
+  /// Users may want to override this, for example, to add priority
+  /// information for use by a queue.
+  class Task {
+   public:
+    virtual ~Task() = default;
+    /// Submit the task
+    ///
+    /// This will be called by the scheduler at most once when there
+    /// is space to run the task.  This is expected to be a fairly quick
+    /// function that simply submits the actual task work to an external
+    /// resource (e.g. I/O thread pool).
+    ///
+    /// If this call fails then the scheduler will enter an aborted state.
+    virtual Result<Future<>> operator()() = 0;
+    /// The cost of the task
+    ///
+    /// A ThrottledAsyncTaskScheduler can be used to limit the number of concurrent tasks.
+    /// A custom cost may be used, for example, if you would like to limit the number of
+    /// tasks based on the total expected RAM usage of the tasks (this is done in the
+    /// scanner)
+    virtual int cost() const { return 1; }
+  };
+
+  /// Add a task to the scheduler
+  ///
+  /// If the scheduler is in an aborted state this call will return false and the task
+  /// will never be run.  This is harmless and does not need to be guarded against.
+  ///
+  /// The return value for this call can usually be ignored.  There is little harm in
+  /// attempting to add tasks to an aborted scheduler.  It is only included for callers
+  /// that want to avoid future task generation to save effort.
+  ///
+  /// \param task the task to submit
+  ///
+  /// \return true if the task was submitted or queued, false if the task was ignored
+  virtual bool AddTask(std::unique_ptr<Task> task) = 0;
+
+  /// Adds an async generator to the scheduler
+  ///
+  /// The async generator will be visited, one item at a time.  Submitting a task
+  /// will consist of polling the generator for the next future.  The generator's future
+  /// will then represent the task itself.
+  ///
+  /// This visits the task serially without readahead.  If readahead or parallelism
+  /// is desired then it should be added in the generator itself.
+  ///
+  /// The generator itself will be kept alive until all tasks have been completed.
+  /// However, if the scheduler is aborted, the generator will be destroyed as soon as the
+  /// next item would be requested.
+  ///
+  /// \param generator the generator to submit to the scheduler
+  /// \param visitor a function which visits each generator future as it completes
+  template <typename T>
+  bool AddAsyncGenerator(std::function<Future<T>()> generator,
+                         std::function<Status(const T&)> visitor);
+
+  template <typename Callable>
+  struct SimpleTask : public Task {
+    explicit SimpleTask(Callable callable) : callable(std::move(callable)) {}
+    Result<Future<>> operator()() override { return callable(); }
+    Callable callable;
+  };
+
+  /// Add a task with cost 1 to the scheduler
+  ///
+  /// \see AddTask for details
+  template <typename Callable>
+  bool AddSimpleTask(Callable callable) {
+    return AddTask(std::make_unique<SimpleTask<Callable>>(std::move(callable)));
+  }
+
+  /// Construct a scheduler
+  ///
+  /// \param initial_task The initial task which is responsible for adding
+  ///        the first subtasks to the scheduler.
+  /// \param abort_callback A callback that will be triggered immediately after a task
+  ///        fails while other tasks may still be running.  Nothing needs to be done here,
+  ///        when a task fails the scheduler will stop accepting new tasks and eventually
+  ///        return the error.  However, this callback can be used to more quickly end
+  ///        long running tasks that have already been submitted.  Defaults to doing
+  ///        nothing.
+  /// \param stop_token An optional stop token that will allow cancellation of the
+  ///        scheduler.  This will be checked before each task is submitted and, in the
+  ///        event of a cancellation, the scheduler will enter an aborted state. This is
+  ///        a graceful cancellation and submitted tasks will still complete.
+  /// \return A future that will be completed when the initial task and all subtasks have
+  ///         finished.
+  static Future<> Make(
+      FnOnce<Status(AsyncTaskScheduler*)> initial_task,
+      FnOnce<void(const Status&)> abort_callback = [](const Status&) {},
+      StopToken stop_token = StopToken::Unstoppable());
+};
+
+class ARROW_EXPORT ThrottledAsyncTaskScheduler : public AsyncTaskScheduler {
+ public:
+  /// An interface for a task queue
+  ///
+  /// A queue's methods will not be called concurrently
+  class Queue {
+   public:
+    virtual ~Queue() = default;
+    /// Push a task to the queue
+    ///
+    /// \param task the task to enqueue
+    virtual void Push(std::unique_ptr<Task> task) = 0;
+    /// Pop the next task from the queue
+    virtual std::unique_ptr<Task> Pop() = 0;
+    /// Peek the next task in the queue
+    virtual const Task& Peek() = 0;
+    /// Check if the queue is empty
+    virtual bool Empty() = 0;
+    /// Purge the queue of all items
+    virtual void Purge() = 0;
+  };
+
+  class Throttle {
+   public:
+    virtual ~Throttle() = default;
+    /// Acquire amt permits
+    ///
+    /// If nullopt is returned then the permits were immediately
+    /// acquired and the caller can proceed.  If a future is returned then the caller
+    /// should wait for the future to complete first.  When the returned future completes
+    /// the permits have NOT been acquired and the caller must call Acquire again
+    ///
+    /// \param amt the number of permits to acquire
+    virtual std::optional<Future<>> TryAcquire(int amt) = 0;
+    /// Release amt permits
+    ///
+    /// This will possibly complete waiting futures and should probably not be
+    /// called while holding locks.
+    ///
+    /// \param amt the number of permits to release
+    virtual void Release(int amt) = 0;
+
+    /// The size of the largest task that can run
+    ///
+    /// Incoming tasks will have their cost latched to this value to ensure
+    /// they can still run (although they will be the only thing allowed to
+    /// run at that time).
+    virtual int Capacity() = 0;
+
+    /// Pause the throttle
+    ///
+    /// Any tasks that have been submitted already will continue.  However, no new tasks
+    /// will be run until the throttle is resumed.
+    virtual void Pause() = 0;
+    /// Resume the throttle
+    ///
+    /// Allows taks to be submitted again.  If there is a max_concurrent_cost limit then
+    /// it will still apply.
+    virtual void Resume() = 0;
+  };
+
+  /// Pause the throttle
+  ///
+  /// Any tasks that have been submitted already will continue.  However, no new tasks
+  /// will be run until the throttle is resumed.
+  virtual void Pause() = 0;
+  /// Resume the throttle
+  ///
+  /// Allows taks to be submitted again.  If there is a max_concurrent_cost limit then
+  /// it will still apply.
+  virtual void Resume() = 0;
+
+  /// Create a throttled view of a scheduler
+  ///
+  /// Tasks added via this view will be subjected to the throttle and, if the tasks cannot
+  /// run immediately, will be placed into a queue.
+  ///
+  /// Although a shared_ptr is returned it should generally be assumed that the caller
+  /// is being given exclusive ownership.  The shared_ptr is used to share the view with
+  /// queued and submitted tasks and the lifetime of those is unpredictable.  It is
+  /// important the caller keep the returned pointer alive for as long as they plan to add
+  /// tasks to the view.
+  ///
+  /// \param scheduler a scheduler to submit tasks to after throttling
+  ///
+  /// This can be the root scheduler, another throttled scheduler, or a task group.  These
+  /// are all composable.
+  ///
+  /// \param max_concurrent_cost the maximum amount of cost allowed to run at any one time
+  ///
+  /// If a task is added that has a cost greater than max_concurrent_cost then its cost
+  /// will be reduced to max_concurrent_cost so that it is still possible for the task to
+  /// run.
+  ///
+  /// \param queue the queue to use when tasks cannot be submitted
+  ///
+  /// By default a FIFO queue will be used.  However, a custom queue can be provided if
+  /// some tasks have higher priority than other tasks.
+  static std::shared_ptr<ThrottledAsyncTaskScheduler> Make(
+      AsyncTaskScheduler* scheduler, int max_concurrent_cost,
+      std::unique_ptr<Queue> queue = NULLPTR);
+
+  /// @brief Create a ThrottledAsyncTaskScheduler using a custom throttle
+  ///
+  /// \see Make
+  static std::shared_ptr<ThrottledAsyncTaskScheduler> MakeWithCustomThrottle(
+      AsyncTaskScheduler* scheduler, std::unique_ptr<Throttle> throttle,
+      std::unique_ptr<Queue> queue = NULLPTR);
+};
+
+/// A utility to keep track of a collection of tasks
+///
+/// Often it is useful to keep track of some state that only needs to stay alive
+/// for some small collection of tasks, or to perform some kind of final cleanup
+/// when a collection of tasks is finished.
+///
+/// For example, when scanning, we need to keep the file reader alive while all scan
+/// tasks run for a given file, and then we can gracefully close it when we finish the
+/// file.
+class ARROW_EXPORT AsyncTaskGroup : public AsyncTaskScheduler {
+ public:
+  /// Destructor for the task group
+  ///
+  /// The destructor might trigger the finish callback.  If the finish callback fails
+  /// then the error will be reported as a task on the scheduler.
+  ///
+  /// Failure to destroy the async task group will not prevent the scheduler from
+  /// finishing.  If the scheduler finishes before the async task group is done then
+  /// the finish callback will be run immediately when the async task group finishes.
+  ///
+  /// If the scheduler has aborted then the finish callback will not run.
+  ~AsyncTaskGroup() = default;
+  /// Create an async task group
+  ///
+  /// The finish callback will not run until the task group is destroyed and all
+  /// tasks are finished so you will generally want to reset / destroy the returned
+  /// unique_ptr at some point.
+  ///
+  /// \param scheduler The underlying scheduler to submit tasks to
+  /// \param finish_callback A callback that will be run only after the task group has
+  ///                        been destroyed and all tasks added by the group have
+  ///                        finished.
+  ///
+  /// Note: in error scenarios the finish callback may not run.  However, it will still,
+  /// of course, be destroyed.
+  static std::unique_ptr<AsyncTaskGroup> Make(AsyncTaskScheduler* scheduler,
+                                              FnOnce<Status()> finish_callback);
+};
+
+/// Create a task group that is also throttled
+///
+/// This is a utility factory that creates a throttled view of a scheduler and then
+/// wraps that throttled view with a task group that destroys the throttle when finished.
+///
+/// \see ThrottledAsyncTaskScheduler
+/// \see AsyncTaskGroup
+/// \param target the underlying scheduler to submit tasks to
+/// \param max_concurrent_cost the maximum amount of cost allowed to run at any one time
+/// \param queue the queue to use when tasks cannot be submitted
+/// \param finish_callback A callback that will be run only after the task group has
+///                  been destroyed and all tasks added by the group have finished
+ARROW_EXPORT std::unique_ptr<ThrottledAsyncTaskScheduler> MakeThrottledAsyncTaskGroup(
+    AsyncTaskScheduler* target, int max_concurrent_cost,
+    std::unique_ptr<ThrottledAsyncTaskScheduler::Queue> queue,
+    FnOnce<Status()> finish_callback);
+
+// Defined down here to avoid circular dependency between AsyncTaskScheduler and
+// AsyncTaskGroup
+template <typename T>
+bool AsyncTaskScheduler::AddAsyncGenerator(std::function<Future<T>()> generator,
+                                           std::function<Status(const T&)> visitor) {
+  struct State {
+    State(std::function<Future<T>()> generator, std::function<Status(const T&)> visitor,
+          std::unique_ptr<AsyncTaskGroup> task_group)
+        : generator(std::move(generator)),
+          visitor(std::move(visitor)),
+          task_group(std::move(task_group)) {}
+    std::function<Future<T>()> generator;
+    std::function<Status(const T&)> visitor;
+    std::unique_ptr<AsyncTaskGroup> task_group;
+  };
+  struct SubmitTask : public Task {
+    explicit SubmitTask(std::unique_ptr<State> state_holder)
+        : state_holder(std::move(state_holder)) {}
+
+    struct SubmitTaskCallback {
+      SubmitTaskCallback(std::unique_ptr<State> state_holder, Future<> task_completion)
+          : state_holder(std::move(state_holder)),
+            task_completion(std::move(task_completion)) {}
+      void operator()(const Result<T>& maybe_item) {
+        if (!maybe_item.ok()) {
+          task_completion.MarkFinished(maybe_item.status());
+          return;
+        }
+        const auto& item = *maybe_item;
+        if (IsIterationEnd(item)) {
+          task_completion.MarkFinished();
+          return;
+        }
+        Status visit_st = state_holder->visitor(item);
+        if (!visit_st.ok()) {
+          task_completion.MarkFinished(std::move(visit_st));
+          return;
+        }
+        state_holder->task_group->AddTask(
+            std::make_unique<SubmitTask>(std::move(state_holder)));
+        task_completion.MarkFinished();
+      }
+      std::unique_ptr<State> state_holder;
+      Future<> task_completion;
+    };
+
+    Result<Future<>> operator()() {
+      Future<> task = Future<>::Make();
+      // Consume as many items as we can (those that are already finished)
+      // synchronously to avoid recursion / stack overflow.
+      while (true) {
+        Future<T> next = state_holder->generator();
+        if (next.TryAddCallback(
+                [&] { return SubmitTaskCallback(std::move(state_holder), task); })) {
+          return task;
+        }
+        ARROW_ASSIGN_OR_RAISE(T item, next.result());
+        if (IsIterationEnd(item)) {
+          task.MarkFinished();
+          return task;
+        }
+        ARROW_RETURN_NOT_OK(state_holder->visitor(item));
+      }
+    }
+    std::unique_ptr<State> state_holder;
+  };
+  std::unique_ptr<AsyncTaskGroup> task_group =
+      AsyncTaskGroup::Make(this, [] { return Status::OK(); });
+  AsyncTaskGroup* task_group_view = task_group.get();
+  std::unique_ptr<State> state_holder = std::make_unique<State>(
+      std::move(generator), std::move(visitor), std::move(task_group));
+  task_group_view->AddTask(std::make_unique<SubmitTask>(std::move(state_holder)));
+  return true;
+}
+
+}  // namespace util
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/base64.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/base64.h
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+#include <string_view>
+
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace util {
+
+ARROW_EXPORT
+std::string base64_encode(std::string_view s);
+
+ARROW_EXPORT
+std::string base64_decode(std::string_view s);
+
+}  // namespace util
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/basic_decimal.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/basic_decimal.h
@@ -0,0 +1,474 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <array>
+#include <cstdint>
+#include <cstring>
+#include <limits>
+#include <string>
+#include <type_traits>
+
+#include "arrow/util/endian.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/type_traits.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+enum class DecimalStatus {
+  kSuccess,
+  kDivideByZero,
+  kOverflow,
+  kRescaleDataLoss,
+};
+
+template <typename Derived, int BIT_WIDTH, int NWORDS = BIT_WIDTH / 64>
+class ARROW_EXPORT GenericBasicDecimal {
+ protected:
+  struct LittleEndianArrayTag {};
+
+#if ARROW_LITTLE_ENDIAN
+  static constexpr int kHighWordIndex = NWORDS - 1;
+#else
+  static constexpr int kHighWordIndex = 0;
+#endif
+
+ public:
+  static constexpr int kBitWidth = BIT_WIDTH;
+  static constexpr int kByteWidth = kBitWidth / 8;
+
+  // A constructor tag to introduce a little-endian encoded array
+  static constexpr LittleEndianArrayTag LittleEndianArray{};
+
+  using WordArray = std::array<uint64_t, NWORDS>;
+
+  /// \brief Empty constructor creates a decimal with a value of 0.
+  constexpr GenericBasicDecimal() noexcept : array_({0}) {}
+
+  /// \brief Create a decimal from the two's complement representation.
+  ///
+  /// Input array is assumed to be in native endianness.
+  constexpr GenericBasicDecimal(
+      const WordArray& array) noexcept  // NOLINT(runtime/explicit)
+      : array_(array) {}
+
+  /// \brief Create a decimal from the two's complement representation.
+  ///
+  /// Input array is assumed to be in little endianness, with native endian elements.
+  GenericBasicDecimal(LittleEndianArrayTag, const WordArray& array) noexcept
+      : GenericBasicDecimal(bit_util::little_endian::ToNative(array)) {}
+
+  /// \brief Create a decimal from an array of bytes.
+  ///
+  /// Bytes are assumed to be in native-endian byte order.
+  explicit GenericBasicDecimal(const uint8_t* bytes) {
+    memcpy(array_.data(), bytes, sizeof(array_));
+  }
+
+  /// \brief Get the bits of the two's complement representation of the number.
+  ///
+  /// The elements are in native endian order. The bits within each uint64_t element
+  /// are in native endian order. For example, on a little endian machine,
+  /// BasicDecimal128(123).native_endian_array() = {123, 0};
+  /// but on a big endian machine,
+  /// BasicDecimal128(123).native_endian_array() = {0, 123};
+  constexpr const WordArray& native_endian_array() const { return array_; }
+
+  /// \brief Get the bits of the two's complement representation of the number.
+  ///
+  /// The elements are in little endian order. However, the bits within each
+  /// uint64_t element are in native endian order.
+  /// For example, BasicDecimal128(123).little_endian_array() = {123, 0};
+  WordArray little_endian_array() const {
+    return bit_util::little_endian::FromNative(array_);
+  }
+
+  const uint8_t* native_endian_bytes() const {
+    return reinterpret_cast<const uint8_t*>(array_.data());
+  }
+
+  uint8_t* mutable_native_endian_bytes() {
+    return reinterpret_cast<uint8_t*>(array_.data());
+  }
+
+  /// \brief Return the raw bytes of the value in native-endian byte order.
+  std::array<uint8_t, kByteWidth> ToBytes() const {
+    std::array<uint8_t, kByteWidth> out{{0}};
+    memcpy(out.data(), array_.data(), kByteWidth);
+    return out;
+  }
+
+  /// \brief Copy the raw bytes of the value in native-endian byte order.
+  void ToBytes(uint8_t* out) const { memcpy(out, array_.data(), kByteWidth); }
+
+  /// Return 1 if positive or zero, -1 if strictly negative.
+  int64_t Sign() const {
+    return 1 | (static_cast<int64_t>(array_[kHighWordIndex]) >> 63);
+  }
+
+  bool IsNegative() const { return static_cast<int64_t>(array_[kHighWordIndex]) < 0; }
+
+ protected:
+  WordArray array_;
+};
+
+/// Represents a signed 128-bit integer in two's complement.
+///
+/// This class is also compiled into LLVM IR - so, it should not have cpp references like
+/// streams and boost.
+class ARROW_EXPORT BasicDecimal128 : public GenericBasicDecimal<BasicDecimal128, 128> {
+ public:
+  static constexpr int kMaxPrecision = 38;
+  static constexpr int kMaxScale = 38;
+
+  using GenericBasicDecimal::GenericBasicDecimal;
+
+  constexpr BasicDecimal128() noexcept : GenericBasicDecimal() {}
+
+  /// \brief Create a BasicDecimal128 from the two's complement representation.
+#if ARROW_LITTLE_ENDIAN
+  constexpr BasicDecimal128(int64_t high, uint64_t low) noexcept
+      : BasicDecimal128(WordArray{low, static_cast<uint64_t>(high)}) {}
+#else
+  constexpr BasicDecimal128(int64_t high, uint64_t low) noexcept
+      : BasicDecimal128(WordArray{static_cast<uint64_t>(high), low}) {}
+#endif
+
+  /// \brief Convert any integer value into a BasicDecimal128.
+  template <typename T,
+            typename = typename std::enable_if<
+                std::is_integral<T>::value && (sizeof(T) <= sizeof(uint64_t)), T>::type>
+  constexpr BasicDecimal128(T value) noexcept  // NOLINT(runtime/explicit)
+      : BasicDecimal128(value >= T{0} ? 0 : -1, static_cast<uint64_t>(value)) {  // NOLINT
+  }
+
+  /// \brief Negate the current value (in-place)
+  BasicDecimal128& Negate();
+
+  /// \brief Absolute value (in-place)
+  BasicDecimal128& Abs();
+
+  /// \brief Absolute value
+  static BasicDecimal128 Abs(const BasicDecimal128& left);
+
+  /// \brief Add a number to this one. The result is truncated to 128 bits.
+  BasicDecimal128& operator+=(const BasicDecimal128& right);
+
+  /// \brief Subtract a number from this one. The result is truncated to 128 bits.
+  BasicDecimal128& operator-=(const BasicDecimal128& right);
+
+  /// \brief Multiply this number by another number. The result is truncated to 128 bits.
+  BasicDecimal128& operator*=(const BasicDecimal128& right);
+
+  /// Divide this number by right and return the result.
+  ///
+  /// This operation is not destructive.
+  /// The answer rounds to zero. Signs work like:
+  ///   21 /  5 ->  4,  1
+  ///  -21 /  5 -> -4, -1
+  ///   21 / -5 -> -4,  1
+  ///  -21 / -5 ->  4, -1
+  /// \param[in] divisor the number to divide by
+  /// \param[out] result the quotient
+  /// \param[out] remainder the remainder after the division
+  DecimalStatus Divide(const BasicDecimal128& divisor, BasicDecimal128* result,
+                       BasicDecimal128* remainder) const;
+
+  /// \brief In-place division.
+  BasicDecimal128& operator/=(const BasicDecimal128& right);
+
+  /// \brief Bitwise "or" between two BasicDecimal128.
+  BasicDecimal128& operator|=(const BasicDecimal128& right);
+
+  /// \brief Bitwise "and" between two BasicDecimal128.
+  BasicDecimal128& operator&=(const BasicDecimal128& right);
+
+  /// \brief Shift left by the given number of bits.
+  BasicDecimal128& operator<<=(uint32_t bits);
+
+  BasicDecimal128 operator<<(uint32_t bits) const {
+    auto res = *this;
+    res <<= bits;
+    return res;
+  }
+
+  /// \brief Shift right by the given number of bits. Negative values will
+  BasicDecimal128& operator>>=(uint32_t bits);
+
+  BasicDecimal128 operator>>(uint32_t bits) const {
+    auto res = *this;
+    res >>= bits;
+    return res;
+  }
+
+  /// \brief Get the high bits of the two's complement representation of the number.
+  constexpr int64_t high_bits() const {
+#if ARROW_LITTLE_ENDIAN
+    return static_cast<int64_t>(array_[1]);
+#else
+    return static_cast<int64_t>(array_[0]);
+#endif
+  }
+
+  /// \brief Get the low bits of the two's complement representation of the number.
+  constexpr uint64_t low_bits() const {
+#if ARROW_LITTLE_ENDIAN
+    return array_[0];
+#else
+    return array_[1];
+#endif
+  }
+
+  /// \brief separate the integer and fractional parts for the given scale.
+  void GetWholeAndFraction(int32_t scale, BasicDecimal128* whole,
+                           BasicDecimal128* fraction) const;
+
+  /// \brief Scale multiplier for given scale value.
+  static const BasicDecimal128& GetScaleMultiplier(int32_t scale);
+  /// \brief Half-scale multiplier for given scale value.
+  static const BasicDecimal128& GetHalfScaleMultiplier(int32_t scale);
+
+  /// \brief Convert BasicDecimal128 from one scale to another
+  DecimalStatus Rescale(int32_t original_scale, int32_t new_scale,
+                        BasicDecimal128* out) const;
+
+  /// \brief Scale up.
+  BasicDecimal128 IncreaseScaleBy(int32_t increase_by) const;
+
+  /// \brief Scale down.
+  /// - If 'round' is true, the right-most digits are dropped and the result value is
+  ///   rounded up (+1 for +ve, -1 for -ve) based on the value of the dropped digits
+  ///   (>= 10^reduce_by / 2).
+  /// - If 'round' is false, the right-most digits are simply dropped.
+  BasicDecimal128 ReduceScaleBy(int32_t reduce_by, bool round = true) const;
+
+  /// \brief Whether this number fits in the given precision
+  ///
+  /// Return true if the number of significant digits is less or equal to `precision`.
+  bool FitsInPrecision(int32_t precision) const;
+
+  /// \brief count the number of leading binary zeroes.
+  int32_t CountLeadingBinaryZeros() const;
+
+  /// \brief Get the maximum valid unscaled decimal value.
+  static const BasicDecimal128& GetMaxValue();
+
+  /// \brief Get the maximum valid unscaled decimal value for the given precision.
+  static BasicDecimal128 GetMaxValue(int32_t precision);
+
+  /// \brief Get the maximum decimal value (is not a valid value).
+  static constexpr BasicDecimal128 GetMaxSentinel() {
+    return BasicDecimal128(/*high=*/std::numeric_limits<int64_t>::max(),
+                           /*low=*/std::numeric_limits<uint64_t>::max());
+  }
+  /// \brief Get the minimum decimal value (is not a valid value).
+  static constexpr BasicDecimal128 GetMinSentinel() {
+    return BasicDecimal128(/*high=*/std::numeric_limits<int64_t>::min(),
+                           /*low=*/std::numeric_limits<uint64_t>::min());
+  }
+};
+
+ARROW_EXPORT bool operator==(const BasicDecimal128& left, const BasicDecimal128& right);
+ARROW_EXPORT bool operator!=(const BasicDecimal128& left, const BasicDecimal128& right);
+ARROW_EXPORT bool operator<(const BasicDecimal128& left, const BasicDecimal128& right);
+ARROW_EXPORT bool operator<=(const BasicDecimal128& left, const BasicDecimal128& right);
+ARROW_EXPORT bool operator>(const BasicDecimal128& left, const BasicDecimal128& right);
+ARROW_EXPORT bool operator>=(const BasicDecimal128& left, const BasicDecimal128& right);
+
+ARROW_EXPORT BasicDecimal128 operator-(const BasicDecimal128& operand);
+ARROW_EXPORT BasicDecimal128 operator~(const BasicDecimal128& operand);
+ARROW_EXPORT BasicDecimal128 operator+(const BasicDecimal128& left,
+                                       const BasicDecimal128& right);
+ARROW_EXPORT BasicDecimal128 operator-(const BasicDecimal128& left,
+                                       const BasicDecimal128& right);
+ARROW_EXPORT BasicDecimal128 operator*(const BasicDecimal128& left,
+                                       const BasicDecimal128& right);
+ARROW_EXPORT BasicDecimal128 operator/(const BasicDecimal128& left,
+                                       const BasicDecimal128& right);
+ARROW_EXPORT BasicDecimal128 operator%(const BasicDecimal128& left,
+                                       const BasicDecimal128& right);
+
+class ARROW_EXPORT BasicDecimal256 : public GenericBasicDecimal<BasicDecimal256, 256> {
+ private:
+  // Due to a bug in clang, we have to declare the extend method prior to its
+  // usage.
+  template <typename T>
+  static constexpr uint64_t extend(T low_bits) noexcept {
+    return low_bits >= T() ? uint64_t{0} : ~uint64_t{0};
+  }
+
+ public:
+  using GenericBasicDecimal::GenericBasicDecimal;
+
+  static constexpr int kMaxPrecision = 76;
+  static constexpr int kMaxScale = 76;
+
+  constexpr BasicDecimal256() noexcept : GenericBasicDecimal() {}
+
+  /// \brief Convert any integer value into a BasicDecimal256.
+  template <typename T,
+            typename = typename std::enable_if<
+                std::is_integral<T>::value && (sizeof(T) <= sizeof(uint64_t)), T>::type>
+  constexpr BasicDecimal256(T value) noexcept  // NOLINT(runtime/explicit)
+      : BasicDecimal256(bit_util::little_endian::ToNative<uint64_t, 4>(
+            {static_cast<uint64_t>(value), extend(value), extend(value),
+             extend(value)})) {}
+
+  explicit BasicDecimal256(const BasicDecimal128& value) noexcept
+      : BasicDecimal256(bit_util::little_endian::ToNative<uint64_t, 4>(
+            {value.low_bits(), static_cast<uint64_t>(value.high_bits()),
+             extend(value.high_bits()), extend(value.high_bits())})) {}
+
+  /// \brief Negate the current value (in-place)
+  BasicDecimal256& Negate();
+
+  /// \brief Absolute value (in-place)
+  BasicDecimal256& Abs();
+
+  /// \brief Absolute value
+  static BasicDecimal256 Abs(const BasicDecimal256& left);
+
+  /// \brief Add a number to this one. The result is truncated to 256 bits.
+  BasicDecimal256& operator+=(const BasicDecimal256& right);
+
+  /// \brief Subtract a number from this one. The result is truncated to 256 bits.
+  BasicDecimal256& operator-=(const BasicDecimal256& right);
+
+  /// \brief Get the lowest bits of the two's complement representation of the number.
+  uint64_t low_bits() const { return bit_util::little_endian::Make(array_)[0]; }
+
+  /// \brief Scale multiplier for given scale value.
+  static const BasicDecimal256& GetScaleMultiplier(int32_t scale);
+  /// \brief Half-scale multiplier for given scale value.
+  static const BasicDecimal256& GetHalfScaleMultiplier(int32_t scale);
+
+  /// \brief Convert BasicDecimal256 from one scale to another
+  DecimalStatus Rescale(int32_t original_scale, int32_t new_scale,
+                        BasicDecimal256* out) const;
+
+  /// \brief Scale up.
+  BasicDecimal256 IncreaseScaleBy(int32_t increase_by) const;
+
+  /// \brief Scale down.
+  /// - If 'round' is true, the right-most digits are dropped and the result value is
+  ///   rounded up (+1 for positive, -1 for negative) based on the value of the
+  ///   dropped digits (>= 10^reduce_by / 2).
+  /// - If 'round' is false, the right-most digits are simply dropped.
+  BasicDecimal256 ReduceScaleBy(int32_t reduce_by, bool round = true) const;
+
+  /// \brief Whether this number fits in the given precision
+  ///
+  /// Return true if the number of significant digits is less or equal to `precision`.
+  bool FitsInPrecision(int32_t precision) const;
+
+  /// \brief Multiply this number by another number. The result is truncated to 256 bits.
+  BasicDecimal256& operator*=(const BasicDecimal256& right);
+
+  /// Divide this number by right and return the result.
+  ///
+  /// This operation is not destructive.
+  /// The answer rounds to zero. Signs work like:
+  ///   21 /  5 ->  4,  1
+  ///  -21 /  5 -> -4, -1
+  ///   21 / -5 -> -4,  1
+  ///  -21 / -5 ->  4, -1
+  /// \param[in] divisor the number to divide by
+  /// \param[out] result the quotient
+  /// \param[out] remainder the remainder after the division
+  DecimalStatus Divide(const BasicDecimal256& divisor, BasicDecimal256* result,
+                       BasicDecimal256* remainder) const;
+
+  /// \brief Shift left by the given number of bits.
+  BasicDecimal256& operator<<=(uint32_t bits);
+
+  BasicDecimal256 operator<<(uint32_t bits) const {
+    auto res = *this;
+    res <<= bits;
+    return res;
+  }
+
+  /// \brief In-place division.
+  BasicDecimal256& operator/=(const BasicDecimal256& right);
+
+  /// \brief Get the maximum valid unscaled decimal value for the given precision.
+  static BasicDecimal256 GetMaxValue(int32_t precision);
+
+  /// \brief Get the maximum decimal value (is not a valid value).
+  static constexpr BasicDecimal256 GetMaxSentinel() {
+#if ARROW_LITTLE_ENDIAN
+    return BasicDecimal256({std::numeric_limits<uint64_t>::max(),
+                            std::numeric_limits<uint64_t>::max(),
+                            std::numeric_limits<uint64_t>::max(),
+                            static_cast<uint64_t>(std::numeric_limits<int64_t>::max())});
+#else
+    return BasicDecimal256({static_cast<uint64_t>(std::numeric_limits<int64_t>::max()),
+                            std::numeric_limits<uint64_t>::max(),
+                            std::numeric_limits<uint64_t>::max(),
+                            std::numeric_limits<uint64_t>::max()});
+#endif
+  }
+  /// \brief Get the minimum decimal value (is not a valid value).
+  static constexpr BasicDecimal256 GetMinSentinel() {
+#if ARROW_LITTLE_ENDIAN
+    return BasicDecimal256(
+        {0, 0, 0, static_cast<uint64_t>(std::numeric_limits<int64_t>::min())});
+#else
+    return BasicDecimal256(
+        {static_cast<uint64_t>(std::numeric_limits<int64_t>::min()), 0, 0, 0});
+#endif
+  }
+};
+
+ARROW_EXPORT inline bool operator==(const BasicDecimal256& left,
+                                    const BasicDecimal256& right) {
+  return left.native_endian_array() == right.native_endian_array();
+}
+
+ARROW_EXPORT inline bool operator!=(const BasicDecimal256& left,
+                                    const BasicDecimal256& right) {
+  return left.native_endian_array() != right.native_endian_array();
+}
+
+ARROW_EXPORT bool operator<(const BasicDecimal256& left, const BasicDecimal256& right);
+
+ARROW_EXPORT inline bool operator<=(const BasicDecimal256& left,
+                                    const BasicDecimal256& right) {
+  return !operator<(right, left);
+}
+
+ARROW_EXPORT inline bool operator>(const BasicDecimal256& left,
+                                   const BasicDecimal256& right) {
+  return operator<(right, left);
+}
+
+ARROW_EXPORT inline bool operator>=(const BasicDecimal256& left,
+                                    const BasicDecimal256& right) {
+  return !operator<(left, right);
+}
+
+ARROW_EXPORT BasicDecimal256 operator-(const BasicDecimal256& operand);
+ARROW_EXPORT BasicDecimal256 operator~(const BasicDecimal256& operand);
+ARROW_EXPORT BasicDecimal256 operator+(const BasicDecimal256& left,
+                                       const BasicDecimal256& right);
+ARROW_EXPORT BasicDecimal256 operator*(const BasicDecimal256& left,
+                                       const BasicDecimal256& right);
+ARROW_EXPORT BasicDecimal256 operator/(const BasicDecimal256& left,
+                                       const BasicDecimal256& right);
+
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/benchmark_util.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/benchmark_util.h
@@ -0,0 +1,139 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <algorithm>
+#include <cstdint>
+#include <string>
+
+#include "benchmark/benchmark.h"
+
+#include "arrow/util/cpu_info.h"
+
+namespace arrow {
+
+// Benchmark changed its parameter type between releases from
+// int to int64_t. As it doesn't have version macros, we need
+// to apply C++ template magic.
+
+template <typename Func>
+struct BenchmarkArgsType;
+
+// Pattern matching that extracts the vector element type of Benchmark::Args()
+template <typename Values>
+struct BenchmarkArgsType<benchmark::internal::Benchmark* (
+    benchmark::internal::Benchmark::*)(const std::vector<Values>&)> {
+  using type = Values;
+};
+
+using ArgsType =
+    typename BenchmarkArgsType<decltype(&benchmark::internal::Benchmark::Args)>::type;
+
+using internal::CpuInfo;
+
+static const CpuInfo* cpu_info = CpuInfo::GetInstance();
+
+static const int64_t kL1Size = cpu_info->CacheSize(CpuInfo::CacheLevel::L1);
+static const int64_t kL2Size = cpu_info->CacheSize(CpuInfo::CacheLevel::L2);
+static const int64_t kL3Size = cpu_info->CacheSize(CpuInfo::CacheLevel::L3);
+static const int64_t kCantFitInL3Size = kL3Size * 4;
+static const std::vector<int64_t> kMemorySizes = {kL1Size, kL2Size, kL3Size,
+                                                  kCantFitInL3Size};
+// 0 is treated as "no nulls"
+static const std::vector<ArgsType> kInverseNullProportions = {10000, 100, 10, 2, 1, 0};
+
+struct GenericItemsArgs {
+  // number of items processed per iteration
+  const int64_t size;
+
+  // proportion of nulls in generated arrays
+  double null_proportion;
+
+  explicit GenericItemsArgs(benchmark::State& state)
+      : size(state.range(0)), state_(state) {
+    if (state.range(1) == 0) {
+      this->null_proportion = 0.0;
+    } else {
+      this->null_proportion = std::min(1., 1. / static_cast<double>(state.range(1)));
+    }
+  }
+
+  ~GenericItemsArgs() {
+    state_.counters["size"] = static_cast<double>(size);
+    state_.counters["null_percent"] = null_proportion * 100;
+    state_.SetItemsProcessed(state_.iterations() * size);
+  }
+
+ private:
+  benchmark::State& state_;
+};
+
+void BenchmarkSetArgsWithSizes(benchmark::internal::Benchmark* bench,
+                               const std::vector<int64_t>& sizes = kMemorySizes) {
+  bench->Unit(benchmark::kMicrosecond);
+
+  for (const auto size : sizes) {
+    for (const auto inverse_null_proportion : kInverseNullProportions) {
+      bench->Args({static_cast<ArgsType>(size), inverse_null_proportion});
+    }
+  }
+}
+
+void BenchmarkSetArgs(benchmark::internal::Benchmark* bench) {
+  BenchmarkSetArgsWithSizes(bench, kMemorySizes);
+}
+
+void RegressionSetArgs(benchmark::internal::Benchmark* bench) {
+  // Regression do not need to account for cache hierarchy, thus optimize for
+  // the best case.
+  BenchmarkSetArgsWithSizes(bench, {kL1Size});
+}
+
+// RAII struct to handle some of the boilerplate in regression benchmarks
+struct RegressionArgs {
+  // size of memory tested (per iteration) in bytes
+  const int64_t size;
+
+  // proportion of nulls in generated arrays
+  double null_proportion;
+
+  // If size_is_bytes is true, then it's a number of bytes, otherwise it's the
+  // number of items processed (for reporting)
+  explicit RegressionArgs(benchmark::State& state, bool size_is_bytes = true)
+      : size(state.range(0)), state_(state), size_is_bytes_(size_is_bytes) {
+    if (state.range(1) == 0) {
+      this->null_proportion = 0.0;
+    } else {
+      this->null_proportion = std::min(1., 1. / static_cast<double>(state.range(1)));
+    }
+  }
+
+  ~RegressionArgs() {
+    state_.counters["size"] = static_cast<double>(size);
+    state_.counters["null_percent"] = null_proportion * 100;
+    if (size_is_bytes_) {
+      state_.SetBytesProcessed(state_.iterations() * size);
+    } else {
+      state_.SetItemsProcessed(state_.iterations() * size);
+    }
+  }
+
+ private:
+  benchmark::State& state_;
+  bool size_is_bytes_;
+};
+
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bit_block_counter.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bit_block_counter.h
@@ -0,0 +1,570 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+#include <limits>
+#include <memory>
+
+#include "arrow/buffer.h"
+#include "arrow/status.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/endian.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/ubsan.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+namespace detail {
+
+inline uint64_t LoadWord(const uint8_t* bytes) {
+  return bit_util::ToLittleEndian(util::SafeLoadAs<uint64_t>(bytes));
+}
+
+inline uint64_t ShiftWord(uint64_t current, uint64_t next, int64_t shift) {
+  if (shift == 0) {
+    return current;
+  }
+  return (current >> shift) | (next << (64 - shift));
+}
+
+// These templates are here to help with unit tests
+
+template <typename T>
+constexpr T BitNot(T x) {
+  return ~x;
+}
+
+template <>
+constexpr bool BitNot(bool x) {
+  return !x;
+}
+
+struct BitBlockAnd {
+  template <typename T>
+  static constexpr T Call(T left, T right) {
+    return left & right;
+  }
+};
+
+struct BitBlockAndNot {
+  template <typename T>
+  static constexpr T Call(T left, T right) {
+    return left & BitNot(right);
+  }
+};
+
+struct BitBlockOr {
+  template <typename T>
+  static constexpr T Call(T left, T right) {
+    return left | right;
+  }
+};
+
+struct BitBlockOrNot {
+  template <typename T>
+  static constexpr T Call(T left, T right) {
+    return left | BitNot(right);
+  }
+};
+
+}  // namespace detail
+
+/// \brief Return value from bit block counters: the total number of bits and
+/// the number of set bits.
+struct BitBlockCount {
+  int16_t length;
+  int16_t popcount;
+
+  bool NoneSet() const { return this->popcount == 0; }
+  bool AllSet() const { return this->length == this->popcount; }
+};
+
+/// \brief A class that scans through a true/false bitmap to compute popcounts
+/// 64 or 256 bits at a time. This is used to accelerate processing of
+/// mostly-not-null array data.
+class ARROW_EXPORT BitBlockCounter {
+ public:
+  BitBlockCounter(const uint8_t* bitmap, int64_t start_offset, int64_t length)
+      : bitmap_(util::MakeNonNull(bitmap) + start_offset / 8),
+        bits_remaining_(length),
+        offset_(start_offset % 8) {}
+
+  /// \brief The bit size of each word run
+  static constexpr int64_t kWordBits = 64;
+
+  /// \brief The bit size of four words run
+  static constexpr int64_t kFourWordsBits = kWordBits * 4;
+
+  /// \brief Return the next run of available bits, usually 256. The returned
+  /// pair contains the size of run and the number of true values. The last
+  /// block will have a length less than 256 if the bitmap length is not a
+  /// multiple of 256, and will return 0-length blocks in subsequent
+  /// invocations.
+  BitBlockCount NextFourWords() {
+    using detail::LoadWord;
+    using detail::ShiftWord;
+
+    if (!bits_remaining_) {
+      return {0, 0};
+    }
+    int64_t total_popcount = 0;
+    if (offset_ == 0) {
+      if (bits_remaining_ < kFourWordsBits) {
+        return GetBlockSlow(kFourWordsBits);
+      }
+      total_popcount += bit_util::PopCount(LoadWord(bitmap_));
+      total_popcount += bit_util::PopCount(LoadWord(bitmap_ + 8));
+      total_popcount += bit_util::PopCount(LoadWord(bitmap_ + 16));
+      total_popcount += bit_util::PopCount(LoadWord(bitmap_ + 24));
+    } else {
+      // When the offset is > 0, we need there to be a word beyond the last
+      // aligned word in the bitmap for the bit shifting logic.
+      if (bits_remaining_ < 5 * kFourWordsBits - offset_) {
+        return GetBlockSlow(kFourWordsBits);
+      }
+      auto current = LoadWord(bitmap_);
+      auto next = LoadWord(bitmap_ + 8);
+      total_popcount += bit_util::PopCount(ShiftWord(current, next, offset_));
+      current = next;
+      next = LoadWord(bitmap_ + 16);
+      total_popcount += bit_util::PopCount(ShiftWord(current, next, offset_));
+      current = next;
+      next = LoadWord(bitmap_ + 24);
+      total_popcount += bit_util::PopCount(ShiftWord(current, next, offset_));
+      current = next;
+      next = LoadWord(bitmap_ + 32);
+      total_popcount += bit_util::PopCount(ShiftWord(current, next, offset_));
+    }
+    bitmap_ += bit_util::BytesForBits(kFourWordsBits);
+    bits_remaining_ -= kFourWordsBits;
+    return {256, static_cast<int16_t>(total_popcount)};
+  }
+
+  /// \brief Return the next run of available bits, usually 64. The returned
+  /// pair contains the size of run and the number of true values. The last
+  /// block will have a length less than 64 if the bitmap length is not a
+  /// multiple of 64, and will return 0-length blocks in subsequent
+  /// invocations.
+  BitBlockCount NextWord() {
+    using detail::LoadWord;
+    using detail::ShiftWord;
+
+    if (!bits_remaining_) {
+      return {0, 0};
+    }
+    int64_t popcount = 0;
+    if (offset_ == 0) {
+      if (bits_remaining_ < kWordBits) {
+        return GetBlockSlow(kWordBits);
+      }
+      popcount = bit_util::PopCount(LoadWord(bitmap_));
+    } else {
+      // When the offset is > 0, we need there to be a word beyond the last
+      // aligned word in the bitmap for the bit shifting logic.
+      if (bits_remaining_ < 2 * kWordBits - offset_) {
+        return GetBlockSlow(kWordBits);
+      }
+      popcount = bit_util::PopCount(
+          ShiftWord(LoadWord(bitmap_), LoadWord(bitmap_ + 8), offset_));
+    }
+    bitmap_ += kWordBits / 8;
+    bits_remaining_ -= kWordBits;
+    return {64, static_cast<int16_t>(popcount)};
+  }
+
+ private:
+  /// \brief Return block with the requested size when doing word-wise
+  /// computation is not possible due to inadequate bits remaining.
+  BitBlockCount GetBlockSlow(int64_t block_size) noexcept;
+
+  const uint8_t* bitmap_;
+  int64_t bits_remaining_;
+  int64_t offset_;
+};
+
+/// \brief A tool to iterate through a possibly non-existent validity bitmap,
+/// to allow us to write one code path for both the with-nulls and no-nulls
+/// cases without giving up a lot of performance.
+class ARROW_EXPORT OptionalBitBlockCounter {
+ public:
+  // validity_bitmap may be NULLPTR
+  OptionalBitBlockCounter(const uint8_t* validity_bitmap, int64_t offset, int64_t length);
+
+  // validity_bitmap may be null
+  OptionalBitBlockCounter(const std::shared_ptr<Buffer>& validity_bitmap, int64_t offset,
+                          int64_t length);
+
+  /// Return block count for next word when the bitmap is available otherwise
+  /// return a block with length up to INT16_MAX when there is no validity
+  /// bitmap (so all the referenced values are not null).
+  BitBlockCount NextBlock() {
+    static constexpr int64_t kMaxBlockSize = std::numeric_limits<int16_t>::max();
+    if (has_bitmap_) {
+      BitBlockCount block = counter_.NextWord();
+      position_ += block.length;
+      return block;
+    } else {
+      int16_t block_size =
+          static_cast<int16_t>(std::min(kMaxBlockSize, length_ - position_));
+      position_ += block_size;
+      // All values are non-null
+      return {block_size, block_size};
+    }
+  }
+
+  // Like NextBlock, but returns a word-sized block even when there is no
+  // validity bitmap
+  BitBlockCount NextWord() {
+    static constexpr int64_t kWordSize = 64;
+    if (has_bitmap_) {
+      BitBlockCount block = counter_.NextWord();
+      position_ += block.length;
+      return block;
+    } else {
+      int16_t block_size = static_cast<int16_t>(std::min(kWordSize, length_ - position_));
+      position_ += block_size;
+      // All values are non-null
+      return {block_size, block_size};
+    }
+  }
+
+ private:
+  const bool has_bitmap_;
+  int64_t position_;
+  int64_t length_;
+  BitBlockCounter counter_;
+};
+
+/// \brief A class that computes popcounts on the result of bitwise operations
+/// between two bitmaps, 64 bits at a time. A 64-bit word is loaded from each
+/// bitmap, then the popcount is computed on e.g. the bitwise-and of the two
+/// words.
+class ARROW_EXPORT BinaryBitBlockCounter {
+ public:
+  BinaryBitBlockCounter(const uint8_t* left_bitmap, int64_t left_offset,
+                        const uint8_t* right_bitmap, int64_t right_offset, int64_t length)
+      : left_bitmap_(util::MakeNonNull(left_bitmap) + left_offset / 8),
+        left_offset_(left_offset % 8),
+        right_bitmap_(util::MakeNonNull(right_bitmap) + right_offset / 8),
+        right_offset_(right_offset % 8),
+        bits_remaining_(length) {}
+
+  /// \brief Return the popcount of the bitwise-and of the next run of
+  /// available bits, up to 64. The returned pair contains the size of run and
+  /// the number of true values. The last block will have a length less than 64
+  /// if the bitmap length is not a multiple of 64, and will return 0-length
+  /// blocks in subsequent invocations.
+  BitBlockCount NextAndWord() { return NextWord<detail::BitBlockAnd>(); }
+
+  /// \brief Computes "x & ~y" block for each available run of bits.
+  BitBlockCount NextAndNotWord() { return NextWord<detail::BitBlockAndNot>(); }
+
+  /// \brief Computes "x | y" block for each available run of bits.
+  BitBlockCount NextOrWord() { return NextWord<detail::BitBlockOr>(); }
+
+  /// \brief Computes "x | ~y" block for each available run of bits.
+  BitBlockCount NextOrNotWord() { return NextWord<detail::BitBlockOrNot>(); }
+
+ private:
+  template <class Op>
+  BitBlockCount NextWord() {
+    using detail::LoadWord;
+    using detail::ShiftWord;
+
+    if (!bits_remaining_) {
+      return {0, 0};
+    }
+    // When the offset is > 0, we need there to be a word beyond the last aligned
+    // word in the bitmap for the bit shifting logic.
+    constexpr int64_t kWordBits = BitBlockCounter::kWordBits;
+    const int64_t bits_required_to_use_words =
+        std::max(left_offset_ == 0 ? 64 : 64 + (64 - left_offset_),
+                 right_offset_ == 0 ? 64 : 64 + (64 - right_offset_));
+    if (bits_remaining_ < bits_required_to_use_words) {
+      const int16_t run_length =
+          static_cast<int16_t>(std::min(bits_remaining_, kWordBits));
+      int16_t popcount = 0;
+      for (int64_t i = 0; i < run_length; ++i) {
+        if (Op::Call(bit_util::GetBit(left_bitmap_, left_offset_ + i),
+                     bit_util::GetBit(right_bitmap_, right_offset_ + i))) {
+          ++popcount;
+        }
+      }
+      // This code path should trigger _at most_ 2 times. In the "two times"
+      // case, the first time the run length will be a multiple of 8.
+      left_bitmap_ += run_length / 8;
+      right_bitmap_ += run_length / 8;
+      bits_remaining_ -= run_length;
+      return {run_length, popcount};
+    }
+
+    int64_t popcount = 0;
+    if (left_offset_ == 0 && right_offset_ == 0) {
+      popcount =
+          bit_util::PopCount(Op::Call(LoadWord(left_bitmap_), LoadWord(right_bitmap_)));
+    } else {
+      auto left_word =
+          ShiftWord(LoadWord(left_bitmap_), LoadWord(left_bitmap_ + 8), left_offset_);
+      auto right_word =
+          ShiftWord(LoadWord(right_bitmap_), LoadWord(right_bitmap_ + 8), right_offset_);
+      popcount = bit_util::PopCount(Op::Call(left_word, right_word));
+    }
+    left_bitmap_ += kWordBits / 8;
+    right_bitmap_ += kWordBits / 8;
+    bits_remaining_ -= kWordBits;
+    return {64, static_cast<int16_t>(popcount)};
+  }
+
+  const uint8_t* left_bitmap_;
+  int64_t left_offset_;
+  const uint8_t* right_bitmap_;
+  int64_t right_offset_;
+  int64_t bits_remaining_;
+};
+
+class ARROW_EXPORT OptionalBinaryBitBlockCounter {
+ public:
+  // Any bitmap may be NULLPTR
+  OptionalBinaryBitBlockCounter(const uint8_t* left_bitmap, int64_t left_offset,
+                                const uint8_t* right_bitmap, int64_t right_offset,
+                                int64_t length);
+
+  // Any bitmap may be null
+  OptionalBinaryBitBlockCounter(const std::shared_ptr<Buffer>& left_bitmap,
+                                int64_t left_offset,
+                                const std::shared_ptr<Buffer>& right_bitmap,
+                                int64_t right_offset, int64_t length);
+
+  BitBlockCount NextAndBlock() {
+    static constexpr int64_t kMaxBlockSize = std::numeric_limits<int16_t>::max();
+    switch (has_bitmap_) {
+      case HasBitmap::BOTH: {
+        BitBlockCount block = binary_counter_.NextAndWord();
+        position_ += block.length;
+        return block;
+      }
+      case HasBitmap::ONE: {
+        BitBlockCount block = unary_counter_.NextWord();
+        position_ += block.length;
+        return block;
+      }
+      case HasBitmap::NONE:
+      default: {
+        const int16_t block_size =
+            static_cast<int16_t>(std::min(kMaxBlockSize, length_ - position_));
+        position_ += block_size;
+        // All values are non-null
+        return {block_size, block_size};
+      }
+    }
+  }
+
+  BitBlockCount NextOrNotBlock() {
+    static constexpr int64_t kMaxBlockSize = std::numeric_limits<int16_t>::max();
+    switch (has_bitmap_) {
+      case HasBitmap::BOTH: {
+        BitBlockCount block = binary_counter_.NextOrNotWord();
+        position_ += block.length;
+        return block;
+      }
+      case HasBitmap::ONE: {
+        BitBlockCount block = unary_counter_.NextWord();
+        position_ += block.length;
+        return block;
+      }
+      case HasBitmap::NONE:
+      default: {
+        const int16_t block_size =
+            static_cast<int16_t>(std::min(kMaxBlockSize, length_ - position_));
+        position_ += block_size;
+        // All values are non-null
+        return {block_size, block_size};
+      }
+    }
+  }
+
+ private:
+  enum class HasBitmap : int { BOTH, ONE, NONE };
+
+  const HasBitmap has_bitmap_;
+  int64_t position_;
+  int64_t length_;
+  BitBlockCounter unary_counter_;
+  BinaryBitBlockCounter binary_counter_;
+
+  static HasBitmap HasBitmapFromBitmaps(bool has_left, bool has_right) {
+    switch (static_cast<int>(has_left) + static_cast<int>(has_right)) {
+      case 0:
+        return HasBitmap::NONE;
+      case 1:
+        return HasBitmap::ONE;
+      default:  // 2
+        return HasBitmap::BOTH;
+    }
+  }
+};
+
+// Functional-style bit block visitors.
+
+template <typename VisitNotNull, typename VisitNull>
+static Status VisitBitBlocks(const uint8_t* bitmap, int64_t offset, int64_t length,
+                             VisitNotNull&& visit_not_null, VisitNull&& visit_null) {
+  internal::OptionalBitBlockCounter bit_counter(bitmap, offset, length);
+  int64_t position = 0;
+  while (position < length) {
+    internal::BitBlockCount block = bit_counter.NextBlock();
+    if (block.AllSet()) {
+      for (int64_t i = 0; i < block.length; ++i, ++position) {
+        ARROW_RETURN_NOT_OK(visit_not_null(position));
+      }
+    } else if (block.NoneSet()) {
+      for (int64_t i = 0; i < block.length; ++i, ++position) {
+        ARROW_RETURN_NOT_OK(visit_null());
+      }
+    } else {
+      for (int64_t i = 0; i < block.length; ++i, ++position) {
+        if (bit_util::GetBit(bitmap, offset + position)) {
+          ARROW_RETURN_NOT_OK(visit_not_null(position));
+        } else {
+          ARROW_RETURN_NOT_OK(visit_null());
+        }
+      }
+    }
+  }
+  return Status::OK();
+}
+
+template <typename VisitNotNull, typename VisitNull>
+static void VisitBitBlocksVoid(const uint8_t* bitmap, int64_t offset, int64_t length,
+                               VisitNotNull&& visit_not_null, VisitNull&& visit_null) {
+  internal::OptionalBitBlockCounter bit_counter(bitmap, offset, length);
+  int64_t position = 0;
+  while (position < length) {
+    internal::BitBlockCount block = bit_counter.NextBlock();
+    if (block.AllSet()) {
+      for (int64_t i = 0; i < block.length; ++i, ++position) {
+        visit_not_null(position);
+      }
+    } else if (block.NoneSet()) {
+      for (int64_t i = 0; i < block.length; ++i, ++position) {
+        visit_null();
+      }
+    } else {
+      for (int64_t i = 0; i < block.length; ++i, ++position) {
+        if (bit_util::GetBit(bitmap, offset + position)) {
+          visit_not_null(position);
+        } else {
+          visit_null();
+        }
+      }
+    }
+  }
+}
+
+template <typename VisitNotNull, typename VisitNull>
+static Status VisitTwoBitBlocks(const uint8_t* left_bitmap, int64_t left_offset,
+                                const uint8_t* right_bitmap, int64_t right_offset,
+                                int64_t length, VisitNotNull&& visit_not_null,
+                                VisitNull&& visit_null) {
+  if (left_bitmap == NULLPTR || right_bitmap == NULLPTR) {
+    // At most one bitmap is present
+    if (left_bitmap == NULLPTR) {
+      return VisitBitBlocks(right_bitmap, right_offset, length,
+                            std::forward<VisitNotNull>(visit_not_null),
+                            std::forward<VisitNull>(visit_null));
+    } else {
+      return VisitBitBlocks(left_bitmap, left_offset, length,
+                            std::forward<VisitNotNull>(visit_not_null),
+                            std::forward<VisitNull>(visit_null));
+    }
+  }
+  BinaryBitBlockCounter bit_counter(left_bitmap, left_offset, right_bitmap, right_offset,
+                                    length);
+  int64_t position = 0;
+  while (position < length) {
+    BitBlockCount block = bit_counter.NextAndWord();
+    if (block.AllSet()) {
+      for (int64_t i = 0; i < block.length; ++i, ++position) {
+        ARROW_RETURN_NOT_OK(visit_not_null(position));
+      }
+    } else if (block.NoneSet()) {
+      for (int64_t i = 0; i < block.length; ++i, ++position) {
+        ARROW_RETURN_NOT_OK(visit_null());
+      }
+    } else {
+      for (int64_t i = 0; i < block.length; ++i, ++position) {
+        if (bit_util::GetBit(left_bitmap, left_offset + position) &&
+            bit_util::GetBit(right_bitmap, right_offset + position)) {
+          ARROW_RETURN_NOT_OK(visit_not_null(position));
+        } else {
+          ARROW_RETURN_NOT_OK(visit_null());
+        }
+      }
+    }
+  }
+  return Status::OK();
+}
+
+template <typename VisitNotNull, typename VisitNull>
+static void VisitTwoBitBlocksVoid(const uint8_t* left_bitmap, int64_t left_offset,
+                                  const uint8_t* right_bitmap, int64_t right_offset,
+                                  int64_t length, VisitNotNull&& visit_not_null,
+                                  VisitNull&& visit_null) {
+  if (left_bitmap == NULLPTR || right_bitmap == NULLPTR) {
+    // At most one bitmap is present
+    if (left_bitmap == NULLPTR) {
+      return VisitBitBlocksVoid(right_bitmap, right_offset, length,
+                                std::forward<VisitNotNull>(visit_not_null),
+                                std::forward<VisitNull>(visit_null));
+    } else {
+      return VisitBitBlocksVoid(left_bitmap, left_offset, length,
+                                std::forward<VisitNotNull>(visit_not_null),
+                                std::forward<VisitNull>(visit_null));
+    }
+  }
+  BinaryBitBlockCounter bit_counter(left_bitmap, left_offset, right_bitmap, right_offset,
+                                    length);
+  int64_t position = 0;
+  while (position < length) {
+    BitBlockCount block = bit_counter.NextAndWord();
+    if (block.AllSet()) {
+      for (int64_t i = 0; i < block.length; ++i, ++position) {
+        visit_not_null(position);
+      }
+    } else if (block.NoneSet()) {
+      for (int64_t i = 0; i < block.length; ++i, ++position) {
+        visit_null();
+      }
+    } else {
+      for (int64_t i = 0; i < block.length; ++i, ++position) {
+        if (bit_util::GetBit(left_bitmap, left_offset + position) &&
+            bit_util::GetBit(right_bitmap, right_offset + position)) {
+          visit_not_null(position);
+        } else {
+          visit_null();
+        }
+      }
+    }
+  }
+}
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bit_run_reader.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bit_run_reader.h
@@ -0,0 +1,515 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <string>
+
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_reader.h"
+#include "arrow/util/endian.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+
+struct BitRun {
+  int64_t length;
+  // Whether bits are set at this point.
+  bool set;
+
+  std::string ToString() const {
+    return std::string("{Length: ") + std::to_string(length) +
+           ", set=" + std::to_string(set) + "}";
+  }
+};
+
+inline bool operator==(const BitRun& lhs, const BitRun& rhs) {
+  return lhs.length == rhs.length && lhs.set == rhs.set;
+}
+
+inline bool operator!=(const BitRun& lhs, const BitRun& rhs) {
+  return lhs.length != rhs.length || lhs.set != rhs.set;
+}
+
+class BitRunReaderLinear {
+ public:
+  BitRunReaderLinear(const uint8_t* bitmap, int64_t start_offset, int64_t length)
+      : reader_(bitmap, start_offset, length) {}
+
+  BitRun NextRun() {
+    BitRun rl = {/*length=*/0, reader_.IsSet()};
+    // Advance while the values are equal and not at the end of list.
+    while (reader_.position() < reader_.length() && reader_.IsSet() == rl.set) {
+      rl.length++;
+      reader_.Next();
+    }
+    return rl;
+  }
+
+ private:
+  BitmapReader reader_;
+};
+
+#if ARROW_LITTLE_ENDIAN
+/// A convenience class for counting the number of contiguous set/unset bits
+/// in a bitmap.
+class ARROW_EXPORT BitRunReader {
+ public:
+  /// \brief Constructs new BitRunReader.
+  ///
+  /// \param[in] bitmap source data
+  /// \param[in] start_offset bit offset into the source data
+  /// \param[in] length number of bits to copy
+  BitRunReader(const uint8_t* bitmap, int64_t start_offset, int64_t length);
+
+  /// Returns a new BitRun containing the number of contiguous
+  /// bits with the same value.  length == 0 indicates the
+  /// end of the bitmap.
+  BitRun NextRun() {
+    if (ARROW_PREDICT_FALSE(position_ >= length_)) {
+      return {/*length=*/0, false};
+    }
+    // This implementation relies on a efficient implementations of
+    // CountTrailingZeros and assumes that runs are more often then
+    // not.  The logic is to incrementally find the next bit change
+    // from the current position.  This is done by zeroing all
+    // bits in word_ up to position_ and using the TrailingZeroCount
+    // to find the index of the next set bit.
+
+    // The runs alternate on each call, so flip the bit.
+    current_run_bit_set_ = !current_run_bit_set_;
+
+    int64_t start_position = position_;
+    int64_t start_bit_offset = start_position & 63;
+    // Invert the word for proper use of CountTrailingZeros and
+    // clear bits so CountTrailingZeros can do it magic.
+    word_ = ~word_ & ~bit_util::LeastSignificantBitMask(start_bit_offset);
+
+    // Go  forward until the next change from unset to set.
+    int64_t new_bits = bit_util::CountTrailingZeros(word_) - start_bit_offset;
+    position_ += new_bits;
+
+    if (ARROW_PREDICT_FALSE(bit_util::IsMultipleOf64(position_)) &&
+        ARROW_PREDICT_TRUE(position_ < length_)) {
+      // Continue extending position while we can advance an entire word.
+      // (updates position_ accordingly).
+      AdvanceUntilChange();
+    }
+
+    return {/*length=*/position_ - start_position, current_run_bit_set_};
+  }
+
+ private:
+  void AdvanceUntilChange() {
+    int64_t new_bits = 0;
+    do {
+      // Advance the position of the bitmap for loading.
+      bitmap_ += sizeof(uint64_t);
+      LoadNextWord();
+      new_bits = bit_util::CountTrailingZeros(word_);
+      // Continue calculating run length.
+      position_ += new_bits;
+    } while (ARROW_PREDICT_FALSE(bit_util::IsMultipleOf64(position_)) &&
+             ARROW_PREDICT_TRUE(position_ < length_) && new_bits > 0);
+  }
+
+  void LoadNextWord() { return LoadWord(length_ - position_); }
+
+  // Helper method for Loading the next word.
+  void LoadWord(int64_t bits_remaining) {
+    word_ = 0;
+    // we need at least an extra byte in this case.
+    if (ARROW_PREDICT_TRUE(bits_remaining >= 64)) {
+      std::memcpy(&word_, bitmap_, 8);
+    } else {
+      int64_t bytes_to_load = bit_util::BytesForBits(bits_remaining);
+      auto word_ptr = reinterpret_cast<uint8_t*>(&word_);
+      std::memcpy(word_ptr, bitmap_, bytes_to_load);
+      // Ensure stoppage at last bit in bitmap by reversing the next higher
+      // order bit.
+      bit_util::SetBitTo(word_ptr, bits_remaining,
+                         !bit_util::GetBit(word_ptr, bits_remaining - 1));
+    }
+
+    // Two cases:
+    //   1. For unset, CountTrailingZeros works naturally so we don't
+    //   invert the word.
+    //   2. Otherwise invert so we can use CountTrailingZeros.
+    if (current_run_bit_set_) {
+      word_ = ~word_;
+    }
+  }
+  const uint8_t* bitmap_;
+  int64_t position_;
+  int64_t length_;
+  uint64_t word_;
+  bool current_run_bit_set_;
+};
+#else
+using BitRunReader = BitRunReaderLinear;
+#endif
+
+struct SetBitRun {
+  int64_t position;
+  int64_t length;
+
+  bool AtEnd() const { return length == 0; }
+
+  std::string ToString() const {
+    return std::string("{pos=") + std::to_string(position) +
+           ", len=" + std::to_string(length) + "}";
+  }
+
+  bool operator==(const SetBitRun& other) const {
+    return position == other.position && length == other.length;
+  }
+  bool operator!=(const SetBitRun& other) const {
+    return position != other.position || length != other.length;
+  }
+};
+
+template <bool Reverse>
+class BaseSetBitRunReader {
+ public:
+  /// \brief Constructs new SetBitRunReader.
+  ///
+  /// \param[in] bitmap source data
+  /// \param[in] start_offset bit offset into the source data
+  /// \param[in] length number of bits to copy
+  ARROW_NOINLINE
+  BaseSetBitRunReader(const uint8_t* bitmap, int64_t start_offset, int64_t length)
+      : bitmap_(util::MakeNonNull(bitmap)),
+        length_(length),
+        remaining_(length_),
+        current_word_(0),
+        current_num_bits_(0) {
+    if (Reverse) {
+      bitmap_ += (start_offset + length) / 8;
+      const int8_t end_bit_offset = static_cast<int8_t>((start_offset + length) % 8);
+      if (length > 0 && end_bit_offset) {
+        // Get LSBs from last byte
+        ++bitmap_;
+        current_num_bits_ =
+            std::min(static_cast<int32_t>(length), static_cast<int32_t>(end_bit_offset));
+        current_word_ = LoadPartialWord(8 - end_bit_offset, current_num_bits_);
+      }
+    } else {
+      bitmap_ += start_offset / 8;
+      const int8_t bit_offset = static_cast<int8_t>(start_offset % 8);
+      if (length > 0 && bit_offset) {
+        // Get MSBs from first byte
+        current_num_bits_ =
+            std::min(static_cast<int32_t>(length), static_cast<int32_t>(8 - bit_offset));
+        current_word_ = LoadPartialWord(bit_offset, current_num_bits_);
+      }
+    }
+  }
+
+  ARROW_NOINLINE
+  SetBitRun NextRun() {
+    int64_t pos = 0;
+    int64_t len = 0;
+    if (current_num_bits_) {
+      const auto run = FindCurrentRun();
+      assert(remaining_ >= 0);
+      if (run.length && current_num_bits_) {
+        // The run ends in current_word_
+        return AdjustRun(run);
+      }
+      pos = run.position;
+      len = run.length;
+    }
+    if (!len) {
+      // We didn't get any ones in current_word_, so we can skip any zeros
+      // in the following words
+      SkipNextZeros();
+      if (remaining_ == 0) {
+        return {0, 0};
+      }
+      assert(current_num_bits_);
+      pos = position();
+    } else if (!current_num_bits_) {
+      if (ARROW_PREDICT_TRUE(remaining_ >= 64)) {
+        current_word_ = LoadFullWord();
+        current_num_bits_ = 64;
+      } else if (remaining_ > 0) {
+        current_word_ = LoadPartialWord(/*bit_offset=*/0, remaining_);
+        current_num_bits_ = static_cast<int32_t>(remaining_);
+      } else {
+        // No bits remaining, perhaps we found a run?
+        return AdjustRun({pos, len});
+      }
+      // If current word starts with a zero, we got a full run
+      if (!(current_word_ & kFirstBit)) {
+        return AdjustRun({pos, len});
+      }
+    }
+    // Current word should now start with a set bit
+    len += CountNextOnes();
+    return AdjustRun({pos, len});
+  }
+
+ protected:
+  int64_t position() const {
+    if (Reverse) {
+      return remaining_;
+    } else {
+      return length_ - remaining_;
+    }
+  }
+
+  SetBitRun AdjustRun(SetBitRun run) {
+    if (Reverse) {
+      assert(run.position >= run.length);
+      run.position -= run.length;
+    }
+    return run;
+  }
+
+  uint64_t LoadFullWord() {
+    uint64_t word;
+    if (Reverse) {
+      bitmap_ -= 8;
+    }
+    memcpy(&word, bitmap_, 8);
+    if (!Reverse) {
+      bitmap_ += 8;
+    }
+    return bit_util::ToLittleEndian(word);
+  }
+
+  uint64_t LoadPartialWord(int8_t bit_offset, int64_t num_bits) {
+    assert(num_bits > 0);
+    uint64_t word = 0;
+    const int64_t num_bytes = bit_util::BytesForBits(num_bits);
+    if (Reverse) {
+      // Read in the most significant bytes of the word
+      bitmap_ -= num_bytes;
+      memcpy(reinterpret_cast<char*>(&word) + 8 - num_bytes, bitmap_, num_bytes);
+      // XXX MostSignificantBitmask
+      return (bit_util::ToLittleEndian(word) << bit_offset) &
+             ~bit_util::LeastSignificantBitMask(64 - num_bits);
+    } else {
+      memcpy(&word, bitmap_, num_bytes);
+      bitmap_ += num_bytes;
+      return (bit_util::ToLittleEndian(word) >> bit_offset) &
+             bit_util::LeastSignificantBitMask(num_bits);
+    }
+  }
+
+  void SkipNextZeros() {
+    assert(current_num_bits_ == 0);
+    while (ARROW_PREDICT_TRUE(remaining_ >= 64)) {
+      current_word_ = LoadFullWord();
+      const auto num_zeros = CountFirstZeros(current_word_);
+      if (num_zeros < 64) {
+        // Run of zeros ends here
+        current_word_ = ConsumeBits(current_word_, num_zeros);
+        current_num_bits_ = 64 - num_zeros;
+        remaining_ -= num_zeros;
+        assert(remaining_ >= 0);
+        assert(current_num_bits_ >= 0);
+        return;
+      }
+      remaining_ -= 64;
+    }
+    // Run of zeros continues in last bitmap word
+    if (remaining_ > 0) {
+      current_word_ = LoadPartialWord(/*bit_offset=*/0, remaining_);
+      current_num_bits_ = static_cast<int32_t>(remaining_);
+      const auto num_zeros =
+          std::min<int32_t>(current_num_bits_, CountFirstZeros(current_word_));
+      current_word_ = ConsumeBits(current_word_, num_zeros);
+      current_num_bits_ -= num_zeros;
+      remaining_ -= num_zeros;
+      assert(remaining_ >= 0);
+      assert(current_num_bits_ >= 0);
+    }
+  }
+
+  int64_t CountNextOnes() {
+    assert(current_word_ & kFirstBit);
+
+    int64_t len;
+    if (~current_word_) {
+      const auto num_ones = CountFirstZeros(~current_word_);
+      assert(num_ones <= current_num_bits_);
+      assert(num_ones <= remaining_);
+      remaining_ -= num_ones;
+      current_word_ = ConsumeBits(current_word_, num_ones);
+      current_num_bits_ -= num_ones;
+      if (current_num_bits_) {
+        // Run of ones ends here
+        return num_ones;
+      }
+      len = num_ones;
+    } else {
+      // current_word_ is all ones
+      remaining_ -= 64;
+      current_num_bits_ = 0;
+      len = 64;
+    }
+
+    while (ARROW_PREDICT_TRUE(remaining_ >= 64)) {
+      current_word_ = LoadFullWord();
+      const auto num_ones = CountFirstZeros(~current_word_);
+      len += num_ones;
+      remaining_ -= num_ones;
+      if (num_ones < 64) {
+        // Run of ones ends here
+        current_word_ = ConsumeBits(current_word_, num_ones);
+        current_num_bits_ = 64 - num_ones;
+        return len;
+      }
+    }
+    // Run of ones continues in last bitmap word
+    if (remaining_ > 0) {
+      current_word_ = LoadPartialWord(/*bit_offset=*/0, remaining_);
+      current_num_bits_ = static_cast<int32_t>(remaining_);
+      const auto num_ones = CountFirstZeros(~current_word_);
+      assert(num_ones <= current_num_bits_);
+      assert(num_ones <= remaining_);
+      current_word_ = ConsumeBits(current_word_, num_ones);
+      current_num_bits_ -= num_ones;
+      remaining_ -= num_ones;
+      len += num_ones;
+    }
+    return len;
+  }
+
+  SetBitRun FindCurrentRun() {
+    // Skip any pending zeros
+    const auto num_zeros = CountFirstZeros(current_word_);
+    if (num_zeros >= current_num_bits_) {
+      remaining_ -= current_num_bits_;
+      current_word_ = 0;
+      current_num_bits_ = 0;
+      return {0, 0};
+    }
+    assert(num_zeros <= remaining_);
+    current_word_ = ConsumeBits(current_word_, num_zeros);
+    current_num_bits_ -= num_zeros;
+    remaining_ -= num_zeros;
+    const int64_t pos = position();
+    // Count any ones
+    const auto num_ones = CountFirstZeros(~current_word_);
+    assert(num_ones <= current_num_bits_);
+    assert(num_ones <= remaining_);
+    current_word_ = ConsumeBits(current_word_, num_ones);
+    current_num_bits_ -= num_ones;
+    remaining_ -= num_ones;
+    return {pos, num_ones};
+  }
+
+  inline int CountFirstZeros(uint64_t word);
+  inline uint64_t ConsumeBits(uint64_t word, int32_t num_bits);
+
+  const uint8_t* bitmap_;
+  const int64_t length_;
+  int64_t remaining_;
+  uint64_t current_word_;
+  int32_t current_num_bits_;
+
+  static constexpr uint64_t kFirstBit = Reverse ? 0x8000000000000000ULL : 1;
+};
+
+template <>
+inline int BaseSetBitRunReader<false>::CountFirstZeros(uint64_t word) {
+  return bit_util::CountTrailingZeros(word);
+}
+
+template <>
+inline int BaseSetBitRunReader<true>::CountFirstZeros(uint64_t word) {
+  return bit_util::CountLeadingZeros(word);
+}
+
+template <>
+inline uint64_t BaseSetBitRunReader<false>::ConsumeBits(uint64_t word, int32_t num_bits) {
+  return word >> num_bits;
+}
+
+template <>
+inline uint64_t BaseSetBitRunReader<true>::ConsumeBits(uint64_t word, int32_t num_bits) {
+  return word << num_bits;
+}
+
+using SetBitRunReader = BaseSetBitRunReader</*Reverse=*/false>;
+using ReverseSetBitRunReader = BaseSetBitRunReader</*Reverse=*/true>;
+
+// Functional-style bit run visitors.
+
+// XXX: Try to make this function small so the compiler can inline and optimize
+// the `visit` function, which is normally a hot loop with vectorizable code.
+// - don't inline SetBitRunReader constructor, it doesn't hurt performance
+// - un-inline NextRun hurts 'many null' cases a bit, but improves normal cases
+template <typename Visit>
+inline Status VisitSetBitRuns(const uint8_t* bitmap, int64_t offset, int64_t length,
+                              Visit&& visit) {
+  if (bitmap == NULLPTR) {
+    // Assuming all set (as in a null bitmap)
+    return visit(static_cast<int64_t>(0), static_cast<int64_t>(length));
+  }
+  SetBitRunReader reader(bitmap, offset, length);
+  while (true) {
+    const auto run = reader.NextRun();
+    if (run.length == 0) {
+      break;
+    }
+    ARROW_RETURN_NOT_OK(visit(run.position, run.length));
+  }
+  return Status::OK();
+}
+
+template <typename Visit>
+inline void VisitSetBitRunsVoid(const uint8_t* bitmap, int64_t offset, int64_t length,
+                                Visit&& visit) {
+  if (bitmap == NULLPTR) {
+    // Assuming all set (as in a null bitmap)
+    visit(static_cast<int64_t>(0), static_cast<int64_t>(length));
+    return;
+  }
+  SetBitRunReader reader(bitmap, offset, length);
+  while (true) {
+    const auto run = reader.NextRun();
+    if (run.length == 0) {
+      break;
+    }
+    visit(run.position, run.length);
+  }
+}
+
+template <typename Visit>
+inline Status VisitSetBitRuns(const std::shared_ptr<Buffer>& bitmap, int64_t offset,
+                              int64_t length, Visit&& visit) {
+  return VisitSetBitRuns(bitmap ? bitmap->data() : NULLPTR, offset, length,
+                         std::forward<Visit>(visit));
+}
+
+template <typename Visit>
+inline void VisitSetBitRunsVoid(const std::shared_ptr<Buffer>& bitmap, int64_t offset,
+                                int64_t length, Visit&& visit) {
+  VisitSetBitRunsVoid(bitmap ? bitmap->data() : NULLPTR, offset, length,
+                      std::forward<Visit>(visit));
+}
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bit_stream_utils.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bit_stream_utils.h
@@ -0,0 +1,534 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// From Apache Impala (incubating) as of 2016-01-29
+
+#pragma once
+
+#include <string.h>
+
+#include <algorithm>
+#include <cstdint>
+
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bpacking.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/ubsan.h"
+
+namespace arrow {
+namespace bit_util {
+
+/// Utility class to write bit/byte streams.  This class can write data to either be
+/// bit packed or byte aligned (and a single stream that has a mix of both).
+/// This class does not allocate memory.
+class BitWriter {
+ public:
+  /// buffer: buffer to write bits to.  Buffer should be preallocated with
+  /// 'buffer_len' bytes.
+  BitWriter(uint8_t* buffer, int buffer_len) : buffer_(buffer), max_bytes_(buffer_len) {
+    Clear();
+  }
+
+  void Clear() {
+    buffered_values_ = 0;
+    byte_offset_ = 0;
+    bit_offset_ = 0;
+  }
+
+  /// The number of current bytes written, including the current byte (i.e. may include a
+  /// fraction of a byte). Includes buffered values.
+  int bytes_written() const {
+    return byte_offset_ + static_cast<int>(bit_util::BytesForBits(bit_offset_));
+  }
+  uint8_t* buffer() const { return buffer_; }
+  int buffer_len() const { return max_bytes_; }
+
+  /// Writes a value to buffered_values_, flushing to buffer_ if necessary.  This is bit
+  /// packed.  Returns false if there was not enough space. num_bits must be <= 32.
+  bool PutValue(uint64_t v, int num_bits);
+
+  /// Writes v to the next aligned byte using num_bytes. If T is larger than
+  /// num_bytes, the extra high-order bytes will be ignored. Returns false if
+  /// there was not enough space.
+  /// Assume the v is stored in buffer_ as a litte-endian format
+  template <typename T>
+  bool PutAligned(T v, int num_bytes);
+
+  /// Write a Vlq encoded int to the buffer.  Returns false if there was not enough
+  /// room.  The value is written byte aligned.
+  /// For more details on vlq:
+  /// en.wikipedia.org/wiki/Variable-length_quantity
+  bool PutVlqInt(uint32_t v);
+
+  // Writes an int zigzag encoded.
+  bool PutZigZagVlqInt(int32_t v);
+
+  /// Write a Vlq encoded int64 to the buffer.  Returns false if there was not enough
+  /// room.  The value is written byte aligned.
+  /// For more details on vlq:
+  /// en.wikipedia.org/wiki/Variable-length_quantity
+  bool PutVlqInt(uint64_t v);
+
+  // Writes an int64 zigzag encoded.
+  bool PutZigZagVlqInt(int64_t v);
+
+  /// Get a pointer to the next aligned byte and advance the underlying buffer
+  /// by num_bytes.
+  /// Returns NULL if there was not enough space.
+  uint8_t* GetNextBytePtr(int num_bytes = 1);
+
+  /// Flushes all buffered values to the buffer. Call this when done writing to
+  /// the buffer.  If 'align' is true, buffered_values_ is reset and any future
+  /// writes will be written to the next byte boundary.
+  void Flush(bool align = false);
+
+ private:
+  uint8_t* buffer_;
+  int max_bytes_;
+
+  /// Bit-packed values are initially written to this variable before being memcpy'd to
+  /// buffer_. This is faster than writing values byte by byte directly to buffer_.
+  uint64_t buffered_values_;
+
+  int byte_offset_;  // Offset in buffer_
+  int bit_offset_;   // Offset in buffered_values_
+};
+
+/// Utility class to read bit/byte stream.  This class can read bits or bytes
+/// that are either byte aligned or not.  It also has utilities to read multiple
+/// bytes in one read (e.g. encoded int).
+class BitReader {
+ public:
+  /// 'buffer' is the buffer to read from.  The buffer's length is 'buffer_len'.
+  BitReader(const uint8_t* buffer, int buffer_len)
+      : buffer_(buffer), max_bytes_(buffer_len), byte_offset_(0), bit_offset_(0) {
+    int num_bytes = std::min(8, max_bytes_ - byte_offset_);
+    memcpy(&buffered_values_, buffer_ + byte_offset_, num_bytes);
+    buffered_values_ = arrow::bit_util::FromLittleEndian(buffered_values_);
+  }
+
+  BitReader()
+      : buffer_(NULL),
+        max_bytes_(0),
+        buffered_values_(0),
+        byte_offset_(0),
+        bit_offset_(0) {}
+
+  void Reset(const uint8_t* buffer, int buffer_len) {
+    buffer_ = buffer;
+    max_bytes_ = buffer_len;
+    byte_offset_ = 0;
+    bit_offset_ = 0;
+    int num_bytes = std::min(8, max_bytes_ - byte_offset_);
+    memcpy(&buffered_values_, buffer_ + byte_offset_, num_bytes);
+    buffered_values_ = arrow::bit_util::FromLittleEndian(buffered_values_);
+  }
+
+  /// Gets the next value from the buffer.  Returns true if 'v' could be read or false if
+  /// there are not enough bytes left.
+  template <typename T>
+  bool GetValue(int num_bits, T* v);
+
+  /// Get a number of values from the buffer. Return the number of values actually read.
+  template <typename T>
+  int GetBatch(int num_bits, T* v, int batch_size);
+
+  /// Reads a 'num_bytes'-sized value from the buffer and stores it in 'v'. T
+  /// needs to be a little-endian native type and big enough to store
+  /// 'num_bytes'. The value is assumed to be byte-aligned so the stream will
+  /// be advanced to the start of the next byte before 'v' is read. Returns
+  /// false if there are not enough bytes left.
+  /// Assume the v was stored in buffer_ as a litte-endian format
+  template <typename T>
+  bool GetAligned(int num_bytes, T* v);
+
+  /// Advances the stream by a number of bits. Returns true if succeed or false if there
+  /// are not enough bits left.
+  bool Advance(int64_t num_bits);
+
+  /// Reads a vlq encoded int from the stream.  The encoded int must start at
+  /// the beginning of a byte. Return false if there were not enough bytes in
+  /// the buffer.
+  bool GetVlqInt(uint32_t* v);
+
+  // Reads a zigzag encoded int `into` v.
+  bool GetZigZagVlqInt(int32_t* v);
+
+  /// Reads a vlq encoded int64 from the stream.  The encoded int must start at
+  /// the beginning of a byte. Return false if there were not enough bytes in
+  /// the buffer.
+  bool GetVlqInt(uint64_t* v);
+
+  // Reads a zigzag encoded int64 `into` v.
+  bool GetZigZagVlqInt(int64_t* v);
+
+  /// Returns the number of bytes left in the stream, not including the current
+  /// byte (i.e., there may be an additional fraction of a byte).
+  int bytes_left() {
+    return max_bytes_ -
+           (byte_offset_ + static_cast<int>(bit_util::BytesForBits(bit_offset_)));
+  }
+
+  /// Maximum byte length of a vlq encoded int
+  static constexpr int kMaxVlqByteLength = 5;
+
+  /// Maximum byte length of a vlq encoded int64
+  static constexpr int kMaxVlqByteLengthForInt64 = 10;
+
+ private:
+  const uint8_t* buffer_;
+  int max_bytes_;
+
+  /// Bytes are memcpy'd from buffer_ and values are read from this variable. This is
+  /// faster than reading values byte by byte directly from buffer_.
+  uint64_t buffered_values_;
+
+  int byte_offset_;  // Offset in buffer_
+  int bit_offset_;   // Offset in buffered_values_
+};
+
+inline bool BitWriter::PutValue(uint64_t v, int num_bits) {
+  DCHECK_LE(num_bits, 64);
+  if (num_bits < 64) {
+    DCHECK_EQ(v >> num_bits, 0) << "v = " << v << ", num_bits = " << num_bits;
+  }
+
+  if (ARROW_PREDICT_FALSE(byte_offset_ * 8 + bit_offset_ + num_bits > max_bytes_ * 8))
+    return false;
+
+  buffered_values_ |= v << bit_offset_;
+  bit_offset_ += num_bits;
+
+  if (ARROW_PREDICT_FALSE(bit_offset_ >= 64)) {
+    // Flush buffered_values_ and write out bits of v that did not fit
+    buffered_values_ = arrow::bit_util::ToLittleEndian(buffered_values_);
+    memcpy(buffer_ + byte_offset_, &buffered_values_, 8);
+    buffered_values_ = 0;
+    byte_offset_ += 8;
+    bit_offset_ -= 64;
+    buffered_values_ =
+        (num_bits - bit_offset_ == 64) ? 0 : (v >> (num_bits - bit_offset_));
+  }
+  DCHECK_LT(bit_offset_, 64);
+  return true;
+}
+
+inline void BitWriter::Flush(bool align) {
+  int num_bytes = static_cast<int>(bit_util::BytesForBits(bit_offset_));
+  DCHECK_LE(byte_offset_ + num_bytes, max_bytes_);
+  auto buffered_values = arrow::bit_util::ToLittleEndian(buffered_values_);
+  memcpy(buffer_ + byte_offset_, &buffered_values, num_bytes);
+
+  if (align) {
+    buffered_values_ = 0;
+    byte_offset_ += num_bytes;
+    bit_offset_ = 0;
+  }
+}
+
+inline uint8_t* BitWriter::GetNextBytePtr(int num_bytes) {
+  Flush(/* align */ true);
+  DCHECK_LE(byte_offset_, max_bytes_);
+  if (byte_offset_ + num_bytes > max_bytes_) return NULL;
+  uint8_t* ptr = buffer_ + byte_offset_;
+  byte_offset_ += num_bytes;
+  return ptr;
+}
+
+template <typename T>
+inline bool BitWriter::PutAligned(T val, int num_bytes) {
+  uint8_t* ptr = GetNextBytePtr(num_bytes);
+  if (ptr == NULL) return false;
+  val = arrow::bit_util::ToLittleEndian(val);
+  memcpy(ptr, &val, num_bytes);
+  return true;
+}
+
+namespace detail {
+
+inline void ResetBufferedValues_(const uint8_t* buffer, int byte_offset,
+                                 int bytes_remaining, uint64_t* buffered_values) {
+  if (ARROW_PREDICT_TRUE(bytes_remaining >= 8)) {
+    memcpy(buffered_values, buffer + byte_offset, 8);
+  } else {
+    memcpy(buffered_values, buffer + byte_offset, bytes_remaining);
+  }
+  *buffered_values = arrow::bit_util::FromLittleEndian(*buffered_values);
+}
+
+template <typename T>
+inline void GetValue_(int num_bits, T* v, int max_bytes, const uint8_t* buffer,
+                      int* bit_offset, int* byte_offset, uint64_t* buffered_values) {
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4800)
+#endif
+  *v = static_cast<T>(bit_util::TrailingBits(*buffered_values, *bit_offset + num_bits) >>
+                      *bit_offset);
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+  *bit_offset += num_bits;
+  if (*bit_offset >= 64) {
+    *byte_offset += 8;
+    *bit_offset -= 64;
+
+    ResetBufferedValues_(buffer, *byte_offset, max_bytes - *byte_offset, buffered_values);
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4800 4805)
+#endif
+    // Read bits of v that crossed into new buffered_values_
+    if (ARROW_PREDICT_TRUE(num_bits - *bit_offset < static_cast<int>(8 * sizeof(T)))) {
+      // if shift exponent(num_bits - *bit_offset) is not less than sizeof(T), *v will not
+      // change and the following code may cause a runtime error that the shift exponent
+      // is too large
+      *v = *v | static_cast<T>(bit_util::TrailingBits(*buffered_values, *bit_offset)
+                               << (num_bits - *bit_offset));
+    }
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+    DCHECK_LE(*bit_offset, 64);
+  }
+}
+
+}  // namespace detail
+
+template <typename T>
+inline bool BitReader::GetValue(int num_bits, T* v) {
+  return GetBatch(num_bits, v, 1) == 1;
+}
+
+template <typename T>
+inline int BitReader::GetBatch(int num_bits, T* v, int batch_size) {
+  DCHECK(buffer_ != NULL);
+  DCHECK_LE(num_bits, static_cast<int>(sizeof(T) * 8));
+
+  int bit_offset = bit_offset_;
+  int byte_offset = byte_offset_;
+  uint64_t buffered_values = buffered_values_;
+  int max_bytes = max_bytes_;
+  const uint8_t* buffer = buffer_;
+
+  const int64_t needed_bits = num_bits * static_cast<int64_t>(batch_size);
+  constexpr uint64_t kBitsPerByte = 8;
+  const int64_t remaining_bits =
+      static_cast<int64_t>(max_bytes - byte_offset) * kBitsPerByte - bit_offset;
+  if (remaining_bits < needed_bits) {
+    batch_size = static_cast<int>(remaining_bits / num_bits);
+  }
+
+  int i = 0;
+  if (ARROW_PREDICT_FALSE(bit_offset != 0)) {
+    for (; i < batch_size && bit_offset != 0; ++i) {
+      detail::GetValue_(num_bits, &v[i], max_bytes, buffer, &bit_offset, &byte_offset,
+                        &buffered_values);
+    }
+  }
+
+  if (sizeof(T) == 4) {
+    int num_unpacked =
+        internal::unpack32(reinterpret_cast<const uint32_t*>(buffer + byte_offset),
+                           reinterpret_cast<uint32_t*>(v + i), batch_size - i, num_bits);
+    i += num_unpacked;
+    byte_offset += num_unpacked * num_bits / 8;
+  } else if (sizeof(T) == 8 && num_bits > 32) {
+    // Use unpack64 only if num_bits is larger than 32
+    // TODO (ARROW-13677): improve the performance of internal::unpack64
+    // and remove the restriction of num_bits
+    int num_unpacked =
+        internal::unpack64(buffer + byte_offset, reinterpret_cast<uint64_t*>(v + i),
+                           batch_size - i, num_bits);
+    i += num_unpacked;
+    byte_offset += num_unpacked * num_bits / 8;
+  } else {
+    // TODO: revisit this limit if necessary
+    DCHECK_LE(num_bits, 32);
+    const int buffer_size = 1024;
+    uint32_t unpack_buffer[buffer_size];
+    while (i < batch_size) {
+      int unpack_size = std::min(buffer_size, batch_size - i);
+      int num_unpacked =
+          internal::unpack32(reinterpret_cast<const uint32_t*>(buffer + byte_offset),
+                             unpack_buffer, unpack_size, num_bits);
+      if (num_unpacked == 0) {
+        break;
+      }
+      for (int k = 0; k < num_unpacked; ++k) {
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4800)
+#endif
+        v[i + k] = static_cast<T>(unpack_buffer[k]);
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+      }
+      i += num_unpacked;
+      byte_offset += num_unpacked * num_bits / 8;
+    }
+  }
+
+  detail::ResetBufferedValues_(buffer, byte_offset, max_bytes - byte_offset,
+                               &buffered_values);
+
+  for (; i < batch_size; ++i) {
+    detail::GetValue_(num_bits, &v[i], max_bytes, buffer, &bit_offset, &byte_offset,
+                      &buffered_values);
+  }
+
+  bit_offset_ = bit_offset;
+  byte_offset_ = byte_offset;
+  buffered_values_ = buffered_values;
+
+  return batch_size;
+}
+
+template <typename T>
+inline bool BitReader::GetAligned(int num_bytes, T* v) {
+  if (ARROW_PREDICT_FALSE(num_bytes > static_cast<int>(sizeof(T)))) {
+    return false;
+  }
+
+  int bytes_read = static_cast<int>(bit_util::BytesForBits(bit_offset_));
+  if (ARROW_PREDICT_FALSE(byte_offset_ + bytes_read + num_bytes > max_bytes_)) {
+    return false;
+  }
+
+  // Advance byte_offset to next unread byte and read num_bytes
+  byte_offset_ += bytes_read;
+  if constexpr (std::is_same_v<T, bool>) {
+    // ARROW-18031: if we're trying to get an aligned bool, just check
+    // the LSB of the next byte and move on. If we memcpy + FromLittleEndian
+    // as usual, we have potential undefined behavior for bools if the value
+    // isn't 0 or 1
+    *v = *(buffer_ + byte_offset_) & 1;
+  } else {
+    memcpy(v, buffer_ + byte_offset_, num_bytes);
+    *v = arrow::bit_util::FromLittleEndian(*v);
+  }
+  byte_offset_ += num_bytes;
+
+  bit_offset_ = 0;
+  detail::ResetBufferedValues_(buffer_, byte_offset_, max_bytes_ - byte_offset_,
+                               &buffered_values_);
+  return true;
+}
+
+inline bool BitReader::Advance(int64_t num_bits) {
+  int64_t bits_required = bit_offset_ + num_bits;
+  int64_t bytes_required = bit_util::BytesForBits(bits_required);
+  if (ARROW_PREDICT_FALSE(bytes_required > max_bytes_ - byte_offset_)) {
+    return false;
+  }
+  byte_offset_ += static_cast<int>(bits_required >> 3);
+  bit_offset_ = static_cast<int>(bits_required & 7);
+  detail::ResetBufferedValues_(buffer_, byte_offset_, max_bytes_ - byte_offset_,
+                               &buffered_values_);
+  return true;
+}
+
+inline bool BitWriter::PutVlqInt(uint32_t v) {
+  bool result = true;
+  while ((v & 0xFFFFFF80UL) != 0UL) {
+    result &= PutAligned<uint8_t>(static_cast<uint8_t>((v & 0x7F) | 0x80), 1);
+    v >>= 7;
+  }
+  result &= PutAligned<uint8_t>(static_cast<uint8_t>(v & 0x7F), 1);
+  return result;
+}
+
+inline bool BitReader::GetVlqInt(uint32_t* v) {
+  uint32_t tmp = 0;
+
+  for (int i = 0; i < kMaxVlqByteLength; i++) {
+    uint8_t byte = 0;
+    if (ARROW_PREDICT_FALSE(!GetAligned<uint8_t>(1, &byte))) {
+      return false;
+    }
+    tmp |= static_cast<uint32_t>(byte & 0x7F) << (7 * i);
+
+    if ((byte & 0x80) == 0) {
+      *v = tmp;
+      return true;
+    }
+  }
+
+  return false;
+}
+
+inline bool BitWriter::PutZigZagVlqInt(int32_t v) {
+  uint32_t u_v = ::arrow::util::SafeCopy<uint32_t>(v);
+  u_v = (u_v << 1) ^ static_cast<uint32_t>(v >> 31);
+  return PutVlqInt(u_v);
+}
+
+inline bool BitReader::GetZigZagVlqInt(int32_t* v) {
+  uint32_t u;
+  if (!GetVlqInt(&u)) return false;
+  u = (u >> 1) ^ (~(u & 1) + 1);
+  *v = ::arrow::util::SafeCopy<int32_t>(u);
+  return true;
+}
+
+inline bool BitWriter::PutVlqInt(uint64_t v) {
+  bool result = true;
+  while ((v & 0xFFFFFFFFFFFFFF80ULL) != 0ULL) {
+    result &= PutAligned<uint8_t>(static_cast<uint8_t>((v & 0x7F) | 0x80), 1);
+    v >>= 7;
+  }
+  result &= PutAligned<uint8_t>(static_cast<uint8_t>(v & 0x7F), 1);
+  return result;
+}
+
+inline bool BitReader::GetVlqInt(uint64_t* v) {
+  uint64_t tmp = 0;
+
+  for (int i = 0; i < kMaxVlqByteLengthForInt64; i++) {
+    uint8_t byte = 0;
+    if (ARROW_PREDICT_FALSE(!GetAligned<uint8_t>(1, &byte))) {
+      return false;
+    }
+    tmp |= static_cast<uint64_t>(byte & 0x7F) << (7 * i);
+
+    if ((byte & 0x80) == 0) {
+      *v = tmp;
+      return true;
+    }
+  }
+
+  return false;
+}
+
+inline bool BitWriter::PutZigZagVlqInt(int64_t v) {
+  uint64_t u_v = ::arrow::util::SafeCopy<uint64_t>(v);
+  u_v = (u_v << 1) ^ static_cast<uint64_t>(v >> 63);
+  return PutVlqInt(u_v);
+}
+
+inline bool BitReader::GetZigZagVlqInt(int64_t* v) {
+  uint64_t u;
+  if (!GetVlqInt(&u)) return false;
+  u = (u >> 1) ^ (~(u & 1) + 1);
+  *v = ::arrow::util::SafeCopy<int64_t>(u);
+  return true;
+}
+
+}  // namespace bit_util
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bit_util.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bit_util.h
@@ -0,0 +1,367 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#if defined(_MSC_VER)
+#if defined(_M_AMD64) || defined(_M_X64)
+#include <intrin.h>  // IWYU pragma: keep
+#include <nmmintrin.h>
+#endif
+
+#pragma intrinsic(_BitScanReverse)
+#pragma intrinsic(_BitScanForward)
+#define ARROW_POPCOUNT64 __popcnt64
+#define ARROW_POPCOUNT32 __popcnt
+#else
+#define ARROW_POPCOUNT64 __builtin_popcountll
+#define ARROW_POPCOUNT32 __builtin_popcount
+#endif
+
+#include <cstdint>
+#include <type_traits>
+
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace detail {
+
+template <typename Integer>
+typename std::make_unsigned<Integer>::type as_unsigned(Integer x) {
+  return static_cast<typename std::make_unsigned<Integer>::type>(x);
+}
+
+}  // namespace detail
+
+namespace bit_util {
+
+// The number of set bits in a given unsigned byte value, pre-computed
+//
+// Generated with the following Python code
+// output = 'static constexpr uint8_t kBytePopcount[] = {{{0}}};'
+// popcounts = [str(bin(i).count('1')) for i in range(0, 256)]
+// print(output.format(', '.join(popcounts)))
+static constexpr uint8_t kBytePopcount[] = {
+    0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3,
+    4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4,
+    4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4,
+    5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5,
+    4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2,
+    3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5,
+    5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4,
+    5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6,
+    4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
+
+static inline uint64_t PopCount(uint64_t bitmap) { return ARROW_POPCOUNT64(bitmap); }
+static inline uint32_t PopCount(uint32_t bitmap) { return ARROW_POPCOUNT32(bitmap); }
+
+//
+// Bit-related computations on integer values
+//
+
+// Returns the ceil of value/divisor
+constexpr int64_t CeilDiv(int64_t value, int64_t divisor) {
+  return (value == 0) ? 0 : 1 + (value - 1) / divisor;
+}
+
+// Return the number of bytes needed to fit the given number of bits
+constexpr int64_t BytesForBits(int64_t bits) {
+  // This formula avoids integer overflow on very large `bits`
+  return (bits >> 3) + ((bits & 7) != 0);
+}
+
+constexpr bool IsPowerOf2(int64_t value) {
+  return value > 0 && (value & (value - 1)) == 0;
+}
+
+constexpr bool IsPowerOf2(uint64_t value) {
+  return value > 0 && (value & (value - 1)) == 0;
+}
+
+// Returns the smallest power of two that contains v.  If v is already a
+// power of two, it is returned as is.
+static inline int64_t NextPower2(int64_t n) {
+  // Taken from
+  // http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
+  n--;
+  n |= n >> 1;
+  n |= n >> 2;
+  n |= n >> 4;
+  n |= n >> 8;
+  n |= n >> 16;
+  n |= n >> 32;
+  n++;
+  return n;
+}
+
+constexpr bool IsMultipleOf64(int64_t n) { return (n & 63) == 0; }
+
+constexpr bool IsMultipleOf8(int64_t n) { return (n & 7) == 0; }
+
+// Returns a mask for the bit_index lower order bits.
+// Only valid for bit_index in the range [0, 64).
+constexpr uint64_t LeastSignificantBitMask(int64_t bit_index) {
+  return (static_cast<uint64_t>(1) << bit_index) - 1;
+}
+
+// Returns 'value' rounded up to the nearest multiple of 'factor'
+constexpr int64_t RoundUp(int64_t value, int64_t factor) {
+  return CeilDiv(value, factor) * factor;
+}
+
+// Returns 'value' rounded down to the nearest multiple of 'factor'
+constexpr int64_t RoundDown(int64_t value, int64_t factor) {
+  return (value / factor) * factor;
+}
+
+// Returns 'value' rounded up to the nearest multiple of 'factor' when factor
+// is a power of two.
+// The result is undefined on overflow, i.e. if `value > 2**64 - factor`,
+// since we cannot return the correct result which would be 2**64.
+constexpr int64_t RoundUpToPowerOf2(int64_t value, int64_t factor) {
+  // DCHECK(value >= 0);
+  // DCHECK(IsPowerOf2(factor));
+  return (value + (factor - 1)) & ~(factor - 1);
+}
+
+constexpr uint64_t RoundUpToPowerOf2(uint64_t value, uint64_t factor) {
+  // DCHECK(IsPowerOf2(factor));
+  return (value + (factor - 1)) & ~(factor - 1);
+}
+
+constexpr int64_t RoundUpToMultipleOf8(int64_t num) { return RoundUpToPowerOf2(num, 8); }
+
+constexpr int64_t RoundUpToMultipleOf64(int64_t num) {
+  return RoundUpToPowerOf2(num, 64);
+}
+
+// Returns the number of bytes covering a sliced bitmap. Find the length
+// rounded to cover full bytes on both extremities.
+//
+// The following example represents a slice (offset=10, length=9)
+//
+// 0       8       16     24
+// |-------|-------|------|
+//           [       ]          (slice)
+//         [             ]      (same slice aligned to bytes bounds, length=16)
+//
+// The covering bytes is the length (in bytes) of this new aligned slice.
+constexpr int64_t CoveringBytes(int64_t offset, int64_t length) {
+  return (bit_util::RoundUp(length + offset, 8) - bit_util::RoundDown(offset, 8)) / 8;
+}
+
+// Returns the 'num_bits' least-significant bits of 'v'.
+static inline uint64_t TrailingBits(uint64_t v, int num_bits) {
+  if (ARROW_PREDICT_FALSE(num_bits == 0)) return 0;
+  if (ARROW_PREDICT_FALSE(num_bits >= 64)) return v;
+  int n = 64 - num_bits;
+  return (v << n) >> n;
+}
+
+/// \brief Count the number of leading zeros in an unsigned integer.
+static inline int CountLeadingZeros(uint32_t value) {
+#if defined(__clang__) || defined(__GNUC__)
+  if (value == 0) return 32;
+  return static_cast<int>(__builtin_clz(value));
+#elif defined(_MSC_VER)
+  unsigned long index;                                               // NOLINT
+  if (_BitScanReverse(&index, static_cast<unsigned long>(value))) {  // NOLINT
+    return 31 - static_cast<int>(index);
+  } else {
+    return 32;
+  }
+#else
+  int bitpos = 0;
+  while (value != 0) {
+    value >>= 1;
+    ++bitpos;
+  }
+  return 32 - bitpos;
+#endif
+}
+
+static inline int CountLeadingZeros(uint64_t value) {
+#if defined(__clang__) || defined(__GNUC__)
+  if (value == 0) return 64;
+  return static_cast<int>(__builtin_clzll(value));
+#elif defined(_MSC_VER)
+  unsigned long index;                     // NOLINT
+  if (_BitScanReverse64(&index, value)) {  // NOLINT
+    return 63 - static_cast<int>(index);
+  } else {
+    return 64;
+  }
+#else
+  int bitpos = 0;
+  while (value != 0) {
+    value >>= 1;
+    ++bitpos;
+  }
+  return 64 - bitpos;
+#endif
+}
+
+static inline int CountTrailingZeros(uint32_t value) {
+#if defined(__clang__) || defined(__GNUC__)
+  if (value == 0) return 32;
+  return static_cast<int>(__builtin_ctzl(value));
+#elif defined(_MSC_VER)
+  unsigned long index;  // NOLINT
+  if (_BitScanForward(&index, value)) {
+    return static_cast<int>(index);
+  } else {
+    return 32;
+  }
+#else
+  int bitpos = 0;
+  if (value) {
+    while (value & 1 == 0) {
+      value >>= 1;
+      ++bitpos;
+    }
+  } else {
+    bitpos = 32;
+  }
+  return bitpos;
+#endif
+}
+
+static inline int CountTrailingZeros(uint64_t value) {
+#if defined(__clang__) || defined(__GNUC__)
+  if (value == 0) return 64;
+  return static_cast<int>(__builtin_ctzll(value));
+#elif defined(_MSC_VER)
+  unsigned long index;  // NOLINT
+  if (_BitScanForward64(&index, value)) {
+    return static_cast<int>(index);
+  } else {
+    return 64;
+  }
+#else
+  int bitpos = 0;
+  if (value) {
+    while (value & 1 == 0) {
+      value >>= 1;
+      ++bitpos;
+    }
+  } else {
+    bitpos = 64;
+  }
+  return bitpos;
+#endif
+}
+
+// Returns the minimum number of bits needed to represent an unsigned value
+static inline int NumRequiredBits(uint64_t x) { return 64 - CountLeadingZeros(x); }
+
+// Returns ceil(log2(x)).
+static inline int Log2(uint64_t x) {
+  // DCHECK_GT(x, 0);
+  return NumRequiredBits(x - 1);
+}
+
+//
+// Utilities for reading and writing individual bits by their index
+// in a memory area.
+//
+
+// Bitmask selecting the k-th bit in a byte
+static constexpr uint8_t kBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128};
+
+// the bitwise complement version of kBitmask
+static constexpr uint8_t kFlippedBitmask[] = {254, 253, 251, 247, 239, 223, 191, 127};
+
+// Bitmask selecting the (k - 1) preceding bits in a byte
+static constexpr uint8_t kPrecedingBitmask[] = {0, 1, 3, 7, 15, 31, 63, 127};
+static constexpr uint8_t kPrecedingWrappingBitmask[] = {255, 1, 3, 7, 15, 31, 63, 127};
+
+// the bitwise complement version of kPrecedingBitmask
+static constexpr uint8_t kTrailingBitmask[] = {255, 254, 252, 248, 240, 224, 192, 128};
+
+static constexpr bool GetBit(const uint8_t* bits, uint64_t i) {
+  return (bits[i >> 3] >> (i & 0x07)) & 1;
+}
+
+// Gets the i-th bit from a byte. Should only be used with i <= 7.
+static constexpr bool GetBitFromByte(uint8_t byte, uint8_t i) {
+  return byte & kBitmask[i];
+}
+
+static inline void ClearBit(uint8_t* bits, int64_t i) {
+  bits[i / 8] &= kFlippedBitmask[i % 8];
+}
+
+static inline void SetBit(uint8_t* bits, int64_t i) { bits[i / 8] |= kBitmask[i % 8]; }
+
+static inline void SetBitTo(uint8_t* bits, int64_t i, bool bit_is_set) {
+  // https://graphics.stanford.edu/~seander/bithacks.html
+  // "Conditionally set or clear bits without branching"
+  // NOTE: this seems to confuse Valgrind as it reads from potentially
+  // uninitialized memory
+  bits[i / 8] ^= static_cast<uint8_t>(-static_cast<uint8_t>(bit_is_set) ^ bits[i / 8]) &
+                 kBitmask[i % 8];
+}
+
+/// \brief set or clear a range of bits quickly
+ARROW_EXPORT
+void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool bits_are_set);
+
+/// \brief Sets all bits in the bitmap to true
+ARROW_EXPORT
+void SetBitmap(uint8_t* data, int64_t offset, int64_t length);
+
+/// \brief Clears all bits in the bitmap (set to false)
+ARROW_EXPORT
+void ClearBitmap(uint8_t* data, int64_t offset, int64_t length);
+
+/// Returns a mask with lower i bits set to 1. If i >= sizeof(Word)*8, all-ones will be
+/// returned
+/// ex:
+/// ref: https://stackoverflow.com/a/59523400
+template <typename Word>
+constexpr Word PrecedingWordBitmask(unsigned int const i) {
+  return (static_cast<Word>(i < sizeof(Word) * 8) << (i & (sizeof(Word) * 8 - 1))) - 1;
+}
+static_assert(PrecedingWordBitmask<uint8_t>(0) == 0x00, "");
+static_assert(PrecedingWordBitmask<uint8_t>(4) == 0x0f, "");
+static_assert(PrecedingWordBitmask<uint8_t>(8) == 0xff, "");
+static_assert(PrecedingWordBitmask<uint16_t>(8) == 0x00ff, "");
+
+/// \brief Create a word with low `n` bits from `low` and high `sizeof(Word)-n` bits
+/// from `high`.
+/// Word ret
+/// for (i = 0; i < sizeof(Word)*8; i++){
+///     ret[i]= i < n ? low[i]: high[i];
+/// }
+template <typename Word>
+constexpr Word SpliceWord(int n, Word low, Word high) {
+  return (high & ~PrecedingWordBitmask<Word>(n)) | (low & PrecedingWordBitmask<Word>(n));
+}
+
+/// \brief Pack integers into a bitmap in batches of 8
+template <int batch_size>
+void PackBits(const uint32_t* values, uint8_t* out) {
+  for (int i = 0; i < batch_size / 8; ++i) {
+    *out++ = (values[0] | values[1] << 1 | values[2] << 2 | values[3] << 3 |
+              values[4] << 4 | values[5] << 5 | values[6] << 6 | values[7] << 7);
+    values += 8;
+  }
+}
+
+}  // namespace bit_util
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bitmap.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bitmap.h
@@ -0,0 +1,469 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <array>
+#include <bitset>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+
+#include "arrow/buffer.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/bitmap_reader.h"
+#include "arrow/util/bitmap_writer.h"
+#include "arrow/util/bytes_view.h"
+#include "arrow/util/compare.h"
+#include "arrow/util/endian.h"
+#include "arrow/util/functional.h"
+#include "arrow/util/string_builder.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class BooleanArray;
+
+namespace internal {
+
+class ARROW_EXPORT Bitmap : public util::ToStringOstreamable<Bitmap>,
+                            public util::EqualityComparable<Bitmap> {
+ public:
+  template <typename Word>
+  using View = std::basic_string_view<Word>;
+
+  Bitmap() = default;
+
+  Bitmap(const std::shared_ptr<Buffer>& buffer, int64_t offset, int64_t length)
+      : data_(buffer->data()), offset_(offset), length_(length) {
+    if (buffer->is_mutable()) {
+      mutable_data_ = buffer->mutable_data();
+    }
+  }
+
+  Bitmap(const void* data, int64_t offset, int64_t length)
+      : data_(reinterpret_cast<const uint8_t*>(data)), offset_(offset), length_(length) {}
+
+  Bitmap(void* data, int64_t offset, int64_t length)
+      : data_(reinterpret_cast<const uint8_t*>(data)),
+        mutable_data_(reinterpret_cast<uint8_t*>(data)),
+        offset_(offset),
+        length_(length) {}
+
+  Bitmap Slice(int64_t offset) const {
+    if (mutable_data_ != NULLPTR) {
+      return Bitmap(mutable_data_, offset_ + offset, length_ - offset);
+    } else {
+      return Bitmap(data_, offset_ + offset, length_ - offset);
+    }
+  }
+
+  Bitmap Slice(int64_t offset, int64_t length) const {
+    if (mutable_data_ != NULLPTR) {
+      return Bitmap(mutable_data_, offset_ + offset, length);
+    } else {
+      return Bitmap(data_, offset_ + offset, length);
+    }
+  }
+
+  std::string ToString() const;
+
+  bool Equals(const Bitmap& other) const;
+
+  std::string Diff(const Bitmap& other) const;
+
+  bool GetBit(int64_t i) const { return bit_util::GetBit(data_, i + offset_); }
+
+  bool operator[](int64_t i) const { return GetBit(i); }
+
+  void SetBitTo(int64_t i, bool v) const {
+    bit_util::SetBitTo(mutable_data_, i + offset_, v);
+  }
+
+  void SetBitsTo(bool v) { bit_util::SetBitsTo(mutable_data_, offset_, length_, v); }
+
+  void CopyFrom(const Bitmap& other);
+  void CopyFromInverted(const Bitmap& other);
+
+  /// \brief Visit bits from each bitmap as bitset<N>
+  ///
+  /// All bitmaps must have identical length.
+  template <size_t N, typename Visitor>
+  static void VisitBits(const Bitmap (&bitmaps)[N], Visitor&& visitor) {
+    int64_t bit_length = BitLength(bitmaps, N);
+    std::bitset<N> bits;
+    for (int64_t bit_i = 0; bit_i < bit_length; ++bit_i) {
+      for (size_t i = 0; i < N; ++i) {
+        bits[i] = bitmaps[i].GetBit(bit_i);
+      }
+      visitor(bits);
+    }
+  }
+
+  /// \brief Visit bits from each bitmap as bitset<N>
+  ///
+  /// All bitmaps must have identical length.
+  template <size_t N, typename Visitor>
+  static void VisitBits(const std::array<Bitmap, N>& bitmaps, Visitor&& visitor) {
+    int64_t bit_length = BitLength(bitmaps);
+    std::bitset<N> bits;
+    for (int64_t bit_i = 0; bit_i < bit_length; ++bit_i) {
+      for (size_t i = 0; i < N; ++i) {
+        bits[i] = bitmaps[i].GetBit(bit_i);
+      }
+      visitor(bits);
+    }
+  }
+
+  /// \brief Visit words of bits from each bitmap as array<Word, N>
+  ///
+  /// All bitmaps must have identical length. The first bit in a visited bitmap
+  /// may be offset within the first visited word, but words will otherwise contain
+  /// densely packed bits loaded from the bitmap. That offset within the first word is
+  /// returned.
+  ///
+  /// TODO(bkietz) allow for early termination
+  // NOTE: this function is efficient on 3+ sufficiently large bitmaps.
+  // It also has a large prolog / epilog overhead and should be used
+  // carefully in other cases.
+  // For 2 bitmaps or less, and/or smaller bitmaps, see also VisitTwoBitBlocksVoid
+  // and BitmapUInt64Reader.
+  template <size_t N, typename Visitor,
+            typename Word = typename std::decay<
+                internal::call_traits::argument_type<0, Visitor&&>>::type::value_type>
+  static int64_t VisitWords(const Bitmap (&bitmaps_arg)[N], Visitor&& visitor) {
+    constexpr int64_t kBitWidth = sizeof(Word) * 8;
+
+    // local, mutable variables which will be sliced/decremented to represent consumption:
+    Bitmap bitmaps[N];
+    int64_t offsets[N];
+    int64_t bit_length = BitLength(bitmaps_arg, N);
+    View<Word> words[N];
+    for (size_t i = 0; i < N; ++i) {
+      bitmaps[i] = bitmaps_arg[i];
+      offsets[i] = bitmaps[i].template word_offset<Word>();
+      assert(offsets[i] >= 0 && offsets[i] < kBitWidth);
+      words[i] = bitmaps[i].template words<Word>();
+    }
+
+    auto consume = [&](int64_t consumed_bits) {
+      for (size_t i = 0; i < N; ++i) {
+        bitmaps[i] = bitmaps[i].Slice(consumed_bits, bit_length - consumed_bits);
+        offsets[i] = bitmaps[i].template word_offset<Word>();
+        assert(offsets[i] >= 0 && offsets[i] < kBitWidth);
+        words[i] = bitmaps[i].template words<Word>();
+      }
+      bit_length -= consumed_bits;
+    };
+
+    std::array<Word, N> visited_words;
+    visited_words.fill(0);
+
+    if (bit_length <= kBitWidth * 2) {
+      // bitmaps fit into one or two words so don't bother with optimization
+      while (bit_length > 0) {
+        auto leading_bits = std::min(bit_length, kBitWidth);
+        SafeLoadWords(bitmaps, 0, leading_bits, false, &visited_words);
+        visitor(visited_words);
+        consume(leading_bits);
+      }
+      return 0;
+    }
+
+    int64_t max_offset = *std::max_element(offsets, offsets + N);
+    int64_t min_offset = *std::min_element(offsets, offsets + N);
+    if (max_offset > 0) {
+      // consume leading bits
+      auto leading_bits = kBitWidth - min_offset;
+      SafeLoadWords(bitmaps, 0, leading_bits, true, &visited_words);
+      visitor(visited_words);
+      consume(leading_bits);
+    }
+    assert(*std::min_element(offsets, offsets + N) == 0);
+
+    int64_t whole_word_count = bit_length / kBitWidth;
+    assert(whole_word_count >= 1);
+
+    if (min_offset == max_offset) {
+      // all offsets were identical, all leading bits have been consumed
+      assert(
+          std::all_of(offsets, offsets + N, [](int64_t offset) { return offset == 0; }));
+
+      for (int64_t word_i = 0; word_i < whole_word_count; ++word_i) {
+        for (size_t i = 0; i < N; ++i) {
+          visited_words[i] = words[i][word_i];
+        }
+        visitor(visited_words);
+      }
+      consume(whole_word_count * kBitWidth);
+    } else {
+      // leading bits from potentially incomplete words have been consumed
+
+      // word_i such that words[i][word_i] and words[i][word_i + 1] are lie entirely
+      // within the bitmap for all i
+      for (int64_t word_i = 0; word_i < whole_word_count - 1; ++word_i) {
+        for (size_t i = 0; i < N; ++i) {
+          if (offsets[i] == 0) {
+            visited_words[i] = words[i][word_i];
+          } else {
+            auto words0 = bit_util::ToLittleEndian(words[i][word_i]);
+            auto words1 = bit_util::ToLittleEndian(words[i][word_i + 1]);
+            visited_words[i] = bit_util::FromLittleEndian(
+                (words0 >> offsets[i]) | (words1 << (kBitWidth - offsets[i])));
+          }
+        }
+        visitor(visited_words);
+      }
+      consume((whole_word_count - 1) * kBitWidth);
+
+      SafeLoadWords(bitmaps, 0, kBitWidth, false, &visited_words);
+
+      visitor(visited_words);
+      consume(kBitWidth);
+    }
+
+    // load remaining bits
+    if (bit_length > 0) {
+      SafeLoadWords(bitmaps, 0, bit_length, false, &visited_words);
+      visitor(visited_words);
+    }
+
+    return min_offset;
+  }
+
+  template <size_t N, size_t M, typename ReaderT, typename WriterT, typename Visitor,
+            typename Word = typename std::decay<
+                internal::call_traits::argument_type<0, Visitor&&>>::type::value_type>
+  static void RunVisitWordsAndWriteLoop(int64_t bit_length,
+                                        std::array<ReaderT, N>& readers,
+                                        std::array<WriterT, M>& writers,
+                                        Visitor&& visitor) {
+    constexpr int64_t kBitWidth = sizeof(Word) * 8;
+
+    std::array<Word, N> visited_words;
+    std::array<Word, M> output_words;
+
+    // every reader will have same number of words, since they are same length'ed
+    // TODO($JIRA) this will be inefficient in some cases. When there are offsets beyond
+    //  Word boundary, every Word would have to be created from 2 adjoining Words
+    auto n_words = readers[0].words();
+    bit_length -= n_words * kBitWidth;
+    while (n_words--) {
+      // first collect all words to visited_words array
+      for (size_t i = 0; i < N; i++) {
+        visited_words[i] = readers[i].NextWord();
+      }
+      visitor(visited_words, &output_words);
+      for (size_t i = 0; i < M; i++) {
+        writers[i].PutNextWord(output_words[i]);
+      }
+    }
+
+    // every reader will have same number of trailing bytes, because of the above reason
+    // tailing portion could be more than one word! (ref: BitmapWordReader constructor)
+    // remaining full/ partial words to write
+
+    if (bit_length) {
+      // convert the word visitor lambda to a byte_visitor
+      auto byte_visitor = [&](const std::array<uint8_t, N>& in,
+                              std::array<uint8_t, M>* out) {
+        std::array<Word, N> in_words;
+        std::array<Word, M> out_words;
+        std::copy(in.begin(), in.end(), in_words.begin());
+        visitor(in_words, &out_words);
+        for (size_t i = 0; i < M; i++) {
+          out->at(i) = static_cast<uint8_t>(out_words[i]);
+        }
+      };
+
+      std::array<uint8_t, N> visited_bytes;
+      std::array<uint8_t, M> output_bytes;
+      int n_bytes = readers[0].trailing_bytes();
+      while (n_bytes--) {
+        visited_bytes.fill(0);
+        output_bytes.fill(0);
+        int valid_bits;
+        for (size_t i = 0; i < N; i++) {
+          visited_bytes[i] = readers[i].NextTrailingByte(valid_bits);
+        }
+        byte_visitor(visited_bytes, &output_bytes);
+        for (size_t i = 0; i < M; i++) {
+          writers[i].PutNextTrailingByte(output_bytes[i], valid_bits);
+        }
+      }
+    }
+  }
+
+  /// \brief Visit words of bits from each input bitmap as array<Word, N> and collects
+  /// outputs to an array<Word, M>, to be written into the output bitmaps accordingly.
+  ///
+  /// All bitmaps must have identical length. The first bit in a visited bitmap
+  /// may be offset within the first visited word, but words will otherwise contain
+  /// densely packed bits loaded from the bitmap. That offset within the first word is
+  /// returned.
+  /// Visitor is expected to have the following signature
+  ///     [](const std::array<Word, N>& in_words, std::array<Word, M>* out_words){...}
+  ///
+  // NOTE: this function is efficient on 3+ sufficiently large bitmaps.
+  // It also has a large prolog / epilog overhead and should be used
+  // carefully in other cases.
+  // For 2 bitmaps or less, and/or smaller bitmaps, see also VisitTwoBitBlocksVoid
+  // and BitmapUInt64Reader.
+  template <size_t N, size_t M, typename Visitor,
+            typename Word = typename std::decay<
+                internal::call_traits::argument_type<0, Visitor&&>>::type::value_type>
+  static void VisitWordsAndWrite(const std::array<Bitmap, N>& bitmaps_arg,
+                                 std::array<Bitmap, M>* out_bitmaps_arg,
+                                 Visitor&& visitor) {
+    int64_t bit_length = BitLength(bitmaps_arg);
+    assert(bit_length == BitLength(*out_bitmaps_arg));
+
+    // if both input and output bitmaps have no byte offset, then use special template
+    if (std::all_of(bitmaps_arg.begin(), bitmaps_arg.end(),
+                    [](const Bitmap& b) { return b.offset_ % 8 == 0; }) &&
+        std::all_of(out_bitmaps_arg->begin(), out_bitmaps_arg->end(),
+                    [](const Bitmap& b) { return b.offset_ % 8 == 0; })) {
+      std::array<BitmapWordReader<Word, /*may_have_byte_offset=*/false>, N> readers;
+      for (size_t i = 0; i < N; ++i) {
+        const Bitmap& in_bitmap = bitmaps_arg[i];
+        readers[i] = BitmapWordReader<Word, /*may_have_byte_offset=*/false>(
+            in_bitmap.data_, in_bitmap.offset_, in_bitmap.length_);
+      }
+
+      std::array<BitmapWordWriter<Word, /*may_have_byte_offset=*/false>, M> writers;
+      for (size_t i = 0; i < M; ++i) {
+        const Bitmap& out_bitmap = out_bitmaps_arg->at(i);
+        writers[i] = BitmapWordWriter<Word, /*may_have_byte_offset=*/false>(
+            out_bitmap.mutable_data_, out_bitmap.offset_, out_bitmap.length_);
+      }
+
+      RunVisitWordsAndWriteLoop(bit_length, readers, writers, visitor);
+    } else {
+      std::array<BitmapWordReader<Word>, N> readers;
+      for (size_t i = 0; i < N; ++i) {
+        const Bitmap& in_bitmap = bitmaps_arg[i];
+        readers[i] =
+            BitmapWordReader<Word>(in_bitmap.data_, in_bitmap.offset_, in_bitmap.length_);
+      }
+
+      std::array<BitmapWordWriter<Word>, M> writers;
+      for (size_t i = 0; i < M; ++i) {
+        const Bitmap& out_bitmap = out_bitmaps_arg->at(i);
+        writers[i] = BitmapWordWriter<Word>(out_bitmap.mutable_data_, out_bitmap.offset_,
+                                            out_bitmap.length_);
+      }
+
+      RunVisitWordsAndWriteLoop(bit_length, readers, writers, visitor);
+    }
+  }
+
+  const uint8_t* data() const { return data_; }
+  uint8_t* mutable_data() { return mutable_data_; }
+
+  /// offset of first bit relative to buffer().data()
+  int64_t offset() const { return offset_; }
+
+  /// number of bits in this Bitmap
+  int64_t length() const { return length_; }
+
+  /// string_view of all bytes which contain any bit in this Bitmap
+  util::bytes_view bytes() const {
+    auto byte_offset = offset_ / 8;
+    auto byte_count = bit_util::CeilDiv(offset_ + length_, 8) - byte_offset;
+    return util::bytes_view(data_ + byte_offset, byte_count);
+  }
+
+ private:
+  /// string_view of all Words which contain any bit in this Bitmap
+  ///
+  /// For example, given Word=uint16_t and a bitmap spanning bits [20, 36)
+  /// words() would span bits [16, 48).
+  ///
+  /// 0       16      32     48     64
+  /// |-------|-------|------|------| (buffer)
+  ///           [       ]             (bitmap)
+  ///         |-------|------|        (returned words)
+  ///
+  /// \warning The words may contain bytes which lie outside the buffer or are
+  /// uninitialized.
+  template <typename Word>
+  View<Word> words() const {
+    auto bytes_addr = reinterpret_cast<intptr_t>(bytes().data());
+    auto words_addr = bytes_addr - bytes_addr % sizeof(Word);
+    auto word_byte_count =
+        bit_util::RoundUpToPowerOf2(static_cast<int64_t>(bytes_addr + bytes().size()),
+                                    static_cast<int64_t>(sizeof(Word))) -
+        words_addr;
+    return View<Word>(reinterpret_cast<const Word*>(words_addr),
+                      word_byte_count / sizeof(Word));
+  }
+
+  /// offset of first bit relative to words<Word>().data()
+  template <typename Word>
+  int64_t word_offset() const {
+    return offset_ + 8 * (reinterpret_cast<intptr_t>(data_) -
+                          reinterpret_cast<intptr_t>(words<Word>().data()));
+  }
+
+  /// load words from bitmaps bitwise
+  template <size_t N, typename Word>
+  static void SafeLoadWords(const Bitmap (&bitmaps)[N], int64_t offset,
+                            int64_t out_length, bool set_trailing_bits,
+                            std::array<Word, N>* out) {
+    out->fill(0);
+
+    int64_t out_offset = set_trailing_bits ? sizeof(Word) * 8 - out_length : 0;
+
+    Bitmap slices[N], out_bitmaps[N];
+    for (size_t i = 0; i < N; ++i) {
+      slices[i] = bitmaps[i].Slice(offset, out_length);
+      out_bitmaps[i] = Bitmap(&out->at(i), out_offset, out_length);
+    }
+
+    int64_t bit_i = 0;
+    Bitmap::VisitBits(slices, [&](std::bitset<N> bits) {
+      for (size_t i = 0; i < N; ++i) {
+        out_bitmaps[i].SetBitTo(bit_i, bits[i]);
+      }
+      ++bit_i;
+    });
+  }
+
+  /// assert bitmaps have identical length and return that length
+  static int64_t BitLength(const Bitmap* bitmaps, size_t N);
+
+  template <size_t N>
+  static int64_t BitLength(const std::array<Bitmap, N>& bitmaps) {
+    for (size_t i = 1; i < N; ++i) {
+      assert(bitmaps[i].length() == bitmaps[0].length());
+    }
+    return bitmaps[0].length();
+  }
+
+  const uint8_t* data_ = NULLPTR;
+  uint8_t* mutable_data_ = NULLPTR;
+  int64_t offset_ = 0, length_ = 0;
+};
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bitmap_builders.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bitmap_builders.h
@@ -0,0 +1,43 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "arrow/result.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+
+/// \brief Generate Bitmap with all position to `value` except for one found
+/// at `straggler_pos`.
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> BitmapAllButOne(MemoryPool* pool, int64_t length,
+                                                int64_t straggler_pos, bool value = true);
+
+/// \brief Convert vector of bytes to bitmap buffer
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> BytesToBits(const std::vector<uint8_t>&,
+                                            MemoryPool* pool = default_memory_pool());
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bitmap_generate.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bitmap_generate.h
@@ -0,0 +1,111 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "arrow/buffer.h"
+#include "arrow/memory_pool.h"
+#include "arrow/result.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+
+// A std::generate() like function to write sequential bits into a bitmap area.
+// Bits preceding the bitmap area are preserved, bits following the bitmap
+// area may be clobbered.
+
+template <class Generator>
+void GenerateBits(uint8_t* bitmap, int64_t start_offset, int64_t length, Generator&& g) {
+  if (length == 0) {
+    return;
+  }
+  uint8_t* cur = bitmap + start_offset / 8;
+  uint8_t bit_mask = bit_util::kBitmask[start_offset % 8];
+  uint8_t current_byte = *cur & bit_util::kPrecedingBitmask[start_offset % 8];
+
+  for (int64_t index = 0; index < length; ++index) {
+    const bool bit = g();
+    current_byte = bit ? (current_byte | bit_mask) : current_byte;
+    bit_mask = static_cast<uint8_t>(bit_mask << 1);
+    if (bit_mask == 0) {
+      bit_mask = 1;
+      *cur++ = current_byte;
+      current_byte = 0;
+    }
+  }
+  if (bit_mask != 1) {
+    *cur++ = current_byte;
+  }
+}
+
+// Like GenerateBits(), but unrolls its main loop for higher performance.
+
+template <class Generator>
+void GenerateBitsUnrolled(uint8_t* bitmap, int64_t start_offset, int64_t length,
+                          Generator&& g) {
+  static_assert(std::is_same<decltype(std::declval<Generator>()()), bool>::value,
+                "Functor passed to GenerateBitsUnrolled must return bool");
+
+  if (length == 0) {
+    return;
+  }
+  uint8_t current_byte;
+  uint8_t* cur = bitmap + start_offset / 8;
+  const uint64_t start_bit_offset = start_offset % 8;
+  uint8_t bit_mask = bit_util::kBitmask[start_bit_offset];
+  int64_t remaining = length;
+
+  if (bit_mask != 0x01) {
+    current_byte = *cur & bit_util::kPrecedingBitmask[start_bit_offset];
+    while (bit_mask != 0 && remaining > 0) {
+      current_byte |= g() * bit_mask;
+      bit_mask = static_cast<uint8_t>(bit_mask << 1);
+      --remaining;
+    }
+    *cur++ = current_byte;
+  }
+
+  int64_t remaining_bytes = remaining / 8;
+  uint8_t out_results[8];
+  while (remaining_bytes-- > 0) {
+    for (int i = 0; i < 8; ++i) {
+      out_results[i] = g();
+    }
+    *cur++ = (out_results[0] | out_results[1] << 1 | out_results[2] << 2 |
+              out_results[3] << 3 | out_results[4] << 4 | out_results[5] << 5 |
+              out_results[6] << 6 | out_results[7] << 7);
+  }
+
+  int64_t remaining_bits = remaining % 8;
+  if (remaining_bits) {
+    current_byte = 0;
+    bit_mask = 0x01;
+    while (remaining_bits-- > 0) {
+      current_byte |= g() * bit_mask;
+      bit_mask = static_cast<uint8_t>(bit_mask << 1);
+    }
+    *cur++ = current_byte;
+  }
+}
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bitmap_ops.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bitmap_ops.h
@@ -0,0 +1,244 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "arrow/result.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Buffer;
+class MemoryPool;
+
+namespace internal {
+
+// ----------------------------------------------------------------------
+// Bitmap utilities
+
+/// Copy a bit range of an existing bitmap
+///
+/// \param[in] pool memory pool to allocate memory from
+/// \param[in] bitmap source data
+/// \param[in] offset bit offset into the source data
+/// \param[in] length number of bits to copy
+///
+/// \return Status message
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> CopyBitmap(MemoryPool* pool, const uint8_t* bitmap,
+                                           int64_t offset, int64_t length);
+
+/// Copy a bit range of an existing bitmap into an existing bitmap
+///
+/// \param[in] bitmap source data
+/// \param[in] offset bit offset into the source data
+/// \param[in] length number of bits to copy
+/// \param[in] dest_offset bit offset into the destination
+/// \param[out] dest the destination buffer, must have at least space for
+/// (offset + length) bits
+ARROW_EXPORT
+void CopyBitmap(const uint8_t* bitmap, int64_t offset, int64_t length, uint8_t* dest,
+                int64_t dest_offset);
+
+/// Invert a bit range of an existing bitmap into an existing bitmap
+///
+/// \param[in] bitmap source data
+/// \param[in] offset bit offset into the source data
+/// \param[in] length number of bits to copy
+/// \param[in] dest_offset bit offset into the destination
+/// \param[out] dest the destination buffer, must have at least space for
+/// (offset + length) bits
+ARROW_EXPORT
+void InvertBitmap(const uint8_t* bitmap, int64_t offset, int64_t length, uint8_t* dest,
+                  int64_t dest_offset);
+
+/// Invert a bit range of an existing bitmap
+///
+/// \param[in] pool memory pool to allocate memory from
+/// \param[in] bitmap source data
+/// \param[in] offset bit offset into the source data
+/// \param[in] length number of bits to copy
+///
+/// \return Status message
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> InvertBitmap(MemoryPool* pool, const uint8_t* bitmap,
+                                             int64_t offset, int64_t length);
+
+/// Reverse a bit range of an existing bitmap into an existing bitmap
+///
+/// \param[in] bitmap source data
+/// \param[in] offset bit offset into the source data
+/// \param[in] length number of bits to reverse
+/// \param[in] dest_offset bit offset into the destination
+/// \param[out] dest the destination buffer, must have at least space for
+/// (offset + length) bits
+ARROW_EXPORT
+void ReverseBitmap(const uint8_t* bitmap, int64_t offset, int64_t length, uint8_t* dest,
+                   int64_t dest_offset);
+
+/// Reverse a bit range of an existing bitmap
+///
+/// \param[in] pool memory pool to allocate memory from
+/// \param[in] bitmap source data
+/// \param[in] offset bit offset into the source data
+/// \param[in] length number of bits to reverse
+///
+/// \return Status message
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> ReverseBitmap(MemoryPool* pool, const uint8_t* bitmap,
+                                              int64_t offset, int64_t length);
+
+/// Compute the number of 1's in the given data array
+///
+/// \param[in] data a packed LSB-ordered bitmap as a byte array
+/// \param[in] bit_offset a bitwise offset into the bitmap
+/// \param[in] length the number of bits to inspect in the bitmap relative to
+/// the offset
+///
+/// \return The number of set (1) bits in the range
+ARROW_EXPORT
+int64_t CountSetBits(const uint8_t* data, int64_t bit_offset, int64_t length);
+
+/// Compute the number of 1's in the result of an "and" (&) of two bitmaps
+///
+/// \param[in] left_bitmap a packed LSB-ordered bitmap as a byte array
+/// \param[in] left_offset a bitwise offset into the left bitmap
+/// \param[in] right_bitmap a packed LSB-ordered bitmap as a byte array
+/// \param[in] right_offset a bitwise offset into the right bitmap
+/// \param[in] length the length of the bitmaps (must be the same)
+///
+/// \return The number of set (1) bits in the "and" of the two bitmaps
+ARROW_EXPORT
+int64_t CountAndSetBits(const uint8_t* left_bitmap, int64_t left_offset,
+                        const uint8_t* right_bitmap, int64_t right_offset,
+                        int64_t length);
+
+ARROW_EXPORT
+bool BitmapEquals(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+                  int64_t right_offset, int64_t length);
+
+// Same as BitmapEquals, but considers a NULL bitmap pointer the same as an
+// all-ones bitmap.
+ARROW_EXPORT
+bool OptionalBitmapEquals(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+                          int64_t right_offset, int64_t length);
+
+ARROW_EXPORT
+bool OptionalBitmapEquals(const std::shared_ptr<Buffer>& left, int64_t left_offset,
+                          const std::shared_ptr<Buffer>& right, int64_t right_offset,
+                          int64_t length);
+
+/// \brief Do a "bitmap and" on right and left buffers starting at
+/// their respective bit-offsets for the given bit-length and put
+/// the results in out_buffer starting at the given bit-offset.
+///
+/// out_buffer will be allocated and initialized to zeros using pool before
+/// the operation.
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> BitmapAnd(MemoryPool* pool, const uint8_t* left,
+                                          int64_t left_offset, const uint8_t* right,
+                                          int64_t right_offset, int64_t length,
+                                          int64_t out_offset);
+
+/// \brief Do a "bitmap and" on right and left buffers starting at
+/// their respective bit-offsets for the given bit-length and put
+/// the results in out starting at the given bit-offset.
+ARROW_EXPORT
+void BitmapAnd(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+               int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out);
+
+/// \brief Do a "bitmap or" for the given bit length on right and left buffers
+/// starting at their respective bit-offsets and put the results in out_buffer
+/// starting at the given bit-offset.
+///
+/// out_buffer will be allocated and initialized to zeros using pool before
+/// the operation.
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> BitmapOr(MemoryPool* pool, const uint8_t* left,
+                                         int64_t left_offset, const uint8_t* right,
+                                         int64_t right_offset, int64_t length,
+                                         int64_t out_offset);
+
+/// \brief Do a "bitmap or" for the given bit length on right and left buffers
+/// starting at their respective bit-offsets and put the results in out
+/// starting at the given bit-offset.
+ARROW_EXPORT
+void BitmapOr(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+              int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out);
+
+/// \brief Do a "bitmap xor" for the given bit-length on right and left
+/// buffers starting at their respective bit-offsets and put the results in
+/// out_buffer starting at the given bit offset.
+///
+/// out_buffer will be allocated and initialized to zeros using pool before
+/// the operation.
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> BitmapXor(MemoryPool* pool, const uint8_t* left,
+                                          int64_t left_offset, const uint8_t* right,
+                                          int64_t right_offset, int64_t length,
+                                          int64_t out_offset);
+
+/// \brief Do a "bitmap xor" for the given bit-length on right and left
+/// buffers starting at their respective bit-offsets and put the results in
+/// out starting at the given bit offset.
+ARROW_EXPORT
+void BitmapXor(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+               int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out);
+
+/// \brief Do a "bitmap and not" on right and left buffers starting at
+/// their respective bit-offsets for the given bit-length and put
+/// the results in out_buffer starting at the given bit-offset.
+///
+/// out_buffer will be allocated and initialized to zeros using pool before
+/// the operation.
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> BitmapAndNot(MemoryPool* pool, const uint8_t* left,
+                                             int64_t left_offset, const uint8_t* right,
+                                             int64_t right_offset, int64_t length,
+                                             int64_t out_offset);
+
+/// \brief Do a "bitmap and not" on right and left buffers starting at
+/// their respective bit-offsets for the given bit-length and put
+/// the results in out starting at the given bit-offset.
+ARROW_EXPORT
+void BitmapAndNot(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+                  int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out);
+
+/// \brief Do a "bitmap or not" on right and left buffers starting at
+/// their respective bit-offsets for the given bit-length and put
+/// the results in out_buffer starting at the given bit-offset.
+///
+/// out_buffer will be allocated and initialized to zeros using pool before
+/// the operation.
+ARROW_EXPORT
+Result<std::shared_ptr<Buffer>> BitmapOrNot(MemoryPool* pool, const uint8_t* left,
+                                            int64_t left_offset, const uint8_t* right,
+                                            int64_t right_offset, int64_t length,
+                                            int64_t out_offset);
+
+/// \brief Do a "bitmap or not" on right and left buffers starting at
+/// their respective bit-offsets for the given bit-length and put
+/// the results in out starting at the given bit-offset.
+ARROW_EXPORT
+void BitmapOrNot(const uint8_t* left, int64_t left_offset, const uint8_t* right,
+                 int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out);
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bitmap_reader.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bitmap_reader.h
@@ -0,0 +1,273 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+
+#include "arrow/buffer.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/endian.h"
+#include "arrow/util/macros.h"
+
+namespace arrow {
+namespace internal {
+
+class BitmapReader {
+ public:
+  BitmapReader(const uint8_t* bitmap, int64_t start_offset, int64_t length)
+      : bitmap_(bitmap), position_(0), length_(length) {
+    current_byte_ = 0;
+    byte_offset_ = start_offset / 8;
+    bit_offset_ = start_offset % 8;
+    if (length > 0) {
+      current_byte_ = bitmap[byte_offset_];
+    }
+  }
+
+  bool IsSet() const { return (current_byte_ & (1 << bit_offset_)) != 0; }
+
+  bool IsNotSet() const { return (current_byte_ & (1 << bit_offset_)) == 0; }
+
+  void Next() {
+    ++bit_offset_;
+    ++position_;
+    if (ARROW_PREDICT_FALSE(bit_offset_ == 8)) {
+      bit_offset_ = 0;
+      ++byte_offset_;
+      if (ARROW_PREDICT_TRUE(position_ < length_)) {
+        current_byte_ = bitmap_[byte_offset_];
+      }
+    }
+  }
+
+  int64_t position() const { return position_; }
+
+  int64_t length() const { return length_; }
+
+ private:
+  const uint8_t* bitmap_;
+  int64_t position_;
+  int64_t length_;
+
+  uint8_t current_byte_;
+  int64_t byte_offset_;
+  int64_t bit_offset_;
+};
+
+// XXX Cannot name it BitmapWordReader because the name is already used
+// in bitmap_ops.cc
+
+class BitmapUInt64Reader {
+ public:
+  BitmapUInt64Reader(const uint8_t* bitmap, int64_t start_offset, int64_t length)
+      : bitmap_(util::MakeNonNull(bitmap) + start_offset / 8),
+        num_carry_bits_(8 - start_offset % 8),
+        length_(length),
+        remaining_length_(length_),
+        carry_bits_(0) {
+    if (length_ > 0) {
+      // Load carry bits from the first byte's MSBs
+      if (length_ >= num_carry_bits_) {
+        carry_bits_ =
+            LoadPartialWord(static_cast<int8_t>(8 - num_carry_bits_), num_carry_bits_);
+      } else {
+        carry_bits_ = LoadPartialWord(static_cast<int8_t>(8 - num_carry_bits_), length_);
+      }
+    }
+  }
+
+  uint64_t NextWord() {
+    if (ARROW_PREDICT_TRUE(remaining_length_ >= 64 + num_carry_bits_)) {
+      // We can load a full word
+      uint64_t next_word = LoadFullWord();
+      // Carry bits come first, then the (64 - num_carry_bits_) LSBs from next_word
+      uint64_t word = carry_bits_ | (next_word << num_carry_bits_);
+      carry_bits_ = next_word >> (64 - num_carry_bits_);
+      remaining_length_ -= 64;
+      return word;
+    } else if (remaining_length_ > num_carry_bits_) {
+      // We can load a partial word
+      uint64_t next_word =
+          LoadPartialWord(/*bit_offset=*/0, remaining_length_ - num_carry_bits_);
+      uint64_t word = carry_bits_ | (next_word << num_carry_bits_);
+      carry_bits_ = next_word >> (64 - num_carry_bits_);
+      remaining_length_ = std::max<int64_t>(remaining_length_ - 64, 0);
+      return word;
+    } else {
+      remaining_length_ = 0;
+      return carry_bits_;
+    }
+  }
+
+  int64_t position() const { return length_ - remaining_length_; }
+
+  int64_t length() const { return length_; }
+
+ private:
+  uint64_t LoadFullWord() {
+    uint64_t word;
+    memcpy(&word, bitmap_, 8);
+    bitmap_ += 8;
+    return bit_util::ToLittleEndian(word);
+  }
+
+  uint64_t LoadPartialWord(int8_t bit_offset, int64_t num_bits) {
+    uint64_t word = 0;
+    const int64_t num_bytes = bit_util::BytesForBits(num_bits);
+    memcpy(&word, bitmap_, num_bytes);
+    bitmap_ += num_bytes;
+    return (bit_util::ToLittleEndian(word) >> bit_offset) &
+           bit_util::LeastSignificantBitMask(num_bits);
+  }
+
+  const uint8_t* bitmap_;
+  const int64_t num_carry_bits_;  // in [1, 8]
+  const int64_t length_;
+  int64_t remaining_length_;
+  uint64_t carry_bits_;
+};
+
+// BitmapWordReader here is faster than BitmapUInt64Reader (in bitmap_reader.h)
+// on sufficiently large inputs.  However, it has a larger prolog / epilog overhead
+// and should probably not be used for small bitmaps.
+
+template <typename Word, bool may_have_byte_offset = true>
+class BitmapWordReader {
+ public:
+  BitmapWordReader() = default;
+  BitmapWordReader(const uint8_t* bitmap, int64_t offset, int64_t length)
+      : offset_(static_cast<int64_t>(may_have_byte_offset) * (offset % 8)),
+        bitmap_(bitmap + offset / 8),
+        bitmap_end_(bitmap_ + bit_util::BytesForBits(offset_ + length)) {
+    // decrement word count by one as we may touch two adjacent words in one iteration
+    nwords_ = length / (sizeof(Word) * 8) - 1;
+    if (nwords_ < 0) {
+      nwords_ = 0;
+    }
+    trailing_bits_ = static_cast<int>(length - nwords_ * sizeof(Word) * 8);
+    trailing_bytes_ = static_cast<int>(bit_util::BytesForBits(trailing_bits_));
+
+    if (nwords_ > 0) {
+      current_data.word_ = load<Word>(bitmap_);
+    } else if (length > 0) {
+      current_data.epi.byte_ = load<uint8_t>(bitmap_);
+    }
+  }
+
+  Word NextWord() {
+    bitmap_ += sizeof(Word);
+    const Word next_word = load<Word>(bitmap_);
+    Word word = current_data.word_;
+    if (may_have_byte_offset && offset_) {
+      // combine two adjacent words into one word
+      // |<------ next ----->|<---- current ---->|
+      // +-------------+-----+-------------+-----+
+      // |     ---     |  A  |      B      | --- |
+      // +-------------+-----+-------------+-----+
+      //                  |         |       offset
+      //                  v         v
+      //               +-----+-------------+
+      //               |  A  |      B      |
+      //               +-----+-------------+
+      //               |<------ word ----->|
+      word >>= offset_;
+      word |= next_word << (sizeof(Word) * 8 - offset_);
+    }
+    current_data.word_ = next_word;
+    return word;
+  }
+
+  uint8_t NextTrailingByte(int& valid_bits) {
+    uint8_t byte;
+    assert(trailing_bits_ > 0);
+
+    if (trailing_bits_ <= 8) {
+      // last byte
+      valid_bits = trailing_bits_;
+      trailing_bits_ = 0;
+      byte = 0;
+      internal::BitmapReader reader(bitmap_, offset_, valid_bits);
+      for (int i = 0; i < valid_bits; ++i) {
+        byte >>= 1;
+        if (reader.IsSet()) {
+          byte |= 0x80;
+        }
+        reader.Next();
+      }
+      byte >>= (8 - valid_bits);
+    } else {
+      ++bitmap_;
+      const uint8_t next_byte = load<uint8_t>(bitmap_);
+      byte = current_data.epi.byte_;
+      if (may_have_byte_offset && offset_) {
+        byte >>= offset_;
+        byte |= next_byte << (8 - offset_);
+      }
+      current_data.epi.byte_ = next_byte;
+      trailing_bits_ -= 8;
+      trailing_bytes_--;
+      valid_bits = 8;
+    }
+    return byte;
+  }
+
+  int64_t words() const { return nwords_; }
+  int trailing_bytes() const { return trailing_bytes_; }
+
+ private:
+  int64_t offset_;
+  const uint8_t* bitmap_;
+
+  const uint8_t* bitmap_end_;
+  int64_t nwords_;
+  int trailing_bits_;
+  int trailing_bytes_;
+  union {
+    Word word_;
+    struct {
+#if ARROW_LITTLE_ENDIAN == 0
+      uint8_t padding_bytes_[sizeof(Word) - 1];
+#endif
+      uint8_t byte_;
+    } epi;
+  } current_data;
+
+  template <typename DType>
+  DType load(const uint8_t* bitmap) {
+    assert(bitmap + sizeof(DType) <= bitmap_end_);
+    return bit_util::ToLittleEndian(util::SafeLoadAs<DType>(bitmap));
+  }
+};
+
+/// \brief Index into a possibly non-existent bitmap
+struct OptionalBitIndexer {
+  const uint8_t* bitmap;
+  const int64_t offset;
+
+  explicit OptionalBitIndexer(const uint8_t* buffer = NULLPTR, int64_t offset = 0)
+      : bitmap(buffer), offset(offset) {}
+
+  bool operator[](int64_t i) const {
+    return bitmap == NULLPTR || bit_util::GetBit(bitmap, offset + i);
+  }
+};
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bitmap_visit.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bitmap_visit.h
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_reader.h"
+
+namespace arrow {
+namespace internal {
+
+// A function that visits each bit in a bitmap and calls a visitor function with a
+// boolean representation of that bit. This is intended to be analogous to
+// GenerateBits.
+template <class Visitor>
+void VisitBits(const uint8_t* bitmap, int64_t start_offset, int64_t length,
+               Visitor&& visit) {
+  BitmapReader reader(bitmap, start_offset, length);
+  for (int64_t index = 0; index < length; ++index) {
+    visit(reader.IsSet());
+    reader.Next();
+  }
+}
+
+// Like VisitBits(), but unrolls its main loop for better performance.
+template <class Visitor>
+void VisitBitsUnrolled(const uint8_t* bitmap, int64_t start_offset, int64_t length,
+                       Visitor&& visit) {
+  if (length == 0) {
+    return;
+  }
+
+  // Start by visiting any bits preceding the first full byte.
+  int64_t num_bits_before_full_bytes =
+      bit_util::RoundUpToMultipleOf8(start_offset) - start_offset;
+  // Truncate num_bits_before_full_bytes if it is greater than length.
+  if (num_bits_before_full_bytes > length) {
+    num_bits_before_full_bytes = length;
+  }
+  // Use the non loop-unrolled VisitBits since we don't want to add branches
+  VisitBits<Visitor>(bitmap, start_offset, num_bits_before_full_bytes, visit);
+
+  // Shift the start pointer to the first full byte and compute the
+  // number of full bytes to be read.
+  const uint8_t* first_full_byte = bitmap + bit_util::CeilDiv(start_offset, 8);
+  const int64_t num_full_bytes = (length - num_bits_before_full_bytes) / 8;
+
+  // Iterate over each full byte of the input bitmap and call the visitor in
+  // a loop-unrolled manner.
+  for (int64_t byte_index = 0; byte_index < num_full_bytes; ++byte_index) {
+    // Get the current bit-packed byte value from the bitmap.
+    const uint8_t byte = *(first_full_byte + byte_index);
+
+    // Execute the visitor function on each bit of the current byte.
+    visit(bit_util::GetBitFromByte(byte, 0));
+    visit(bit_util::GetBitFromByte(byte, 1));
+    visit(bit_util::GetBitFromByte(byte, 2));
+    visit(bit_util::GetBitFromByte(byte, 3));
+    visit(bit_util::GetBitFromByte(byte, 4));
+    visit(bit_util::GetBitFromByte(byte, 5));
+    visit(bit_util::GetBitFromByte(byte, 6));
+    visit(bit_util::GetBitFromByte(byte, 7));
+  }
+
+  // Write any leftover bits in the last byte.
+  const int64_t num_bits_after_full_bytes = (length - num_bits_before_full_bytes) % 8;
+  VisitBits<Visitor>(first_full_byte + num_full_bytes, 0, num_bits_after_full_bytes,
+                     visit);
+}
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bitmap_writer.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bitmap_writer.h
@@ -0,0 +1,286 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+
+#include "arrow/util/bit_util.h"
+#include "arrow/util/endian.h"
+#include "arrow/util/macros.h"
+
+namespace arrow {
+namespace internal {
+
+class BitmapWriter {
+  // A sequential bitwise writer that preserves surrounding bit values.
+
+ public:
+  BitmapWriter(uint8_t* bitmap, int64_t start_offset, int64_t length)
+      : bitmap_(bitmap), position_(0), length_(length) {
+    byte_offset_ = start_offset / 8;
+    bit_mask_ = bit_util::kBitmask[start_offset % 8];
+    if (length > 0) {
+      current_byte_ = bitmap[byte_offset_];
+    } else {
+      current_byte_ = 0;
+    }
+  }
+
+  void Set() { current_byte_ |= bit_mask_; }
+
+  void Clear() { current_byte_ &= bit_mask_ ^ 0xFF; }
+
+  void Next() {
+    bit_mask_ = static_cast<uint8_t>(bit_mask_ << 1);
+    ++position_;
+    if (bit_mask_ == 0) {
+      // Finished this byte, need advancing
+      bit_mask_ = 0x01;
+      bitmap_[byte_offset_++] = current_byte_;
+      if (ARROW_PREDICT_TRUE(position_ < length_)) {
+        current_byte_ = bitmap_[byte_offset_];
+      }
+    }
+  }
+
+  void Finish() {
+    // Store current byte if we didn't went past bitmap storage
+    if (length_ > 0 && (bit_mask_ != 0x01 || position_ < length_)) {
+      bitmap_[byte_offset_] = current_byte_;
+    }
+  }
+
+  int64_t position() const { return position_; }
+
+ private:
+  uint8_t* bitmap_;
+  int64_t position_;
+  int64_t length_;
+
+  uint8_t current_byte_;
+  uint8_t bit_mask_;
+  int64_t byte_offset_;
+};
+
+class FirstTimeBitmapWriter {
+  // Like BitmapWriter, but any bit values *following* the bits written
+  // might be clobbered.  It is hence faster than BitmapWriter, and can
+  // also avoid false positives with Valgrind.
+
+ public:
+  FirstTimeBitmapWriter(uint8_t* bitmap, int64_t start_offset, int64_t length)
+      : bitmap_(bitmap), position_(0), length_(length) {
+    current_byte_ = 0;
+    byte_offset_ = start_offset / 8;
+    bit_mask_ = bit_util::kBitmask[start_offset % 8];
+    if (length > 0) {
+      current_byte_ =
+          bitmap[byte_offset_] & bit_util::kPrecedingBitmask[start_offset % 8];
+    } else {
+      current_byte_ = 0;
+    }
+  }
+
+  /// Appends number_of_bits from word to valid_bits and valid_bits_offset.
+  ///
+  /// \param[in] word The LSB bitmap to append. Any bits past number_of_bits are assumed
+  ///            to be unset (i.e. 0).
+  /// \param[in] number_of_bits The number of bits to append from word.
+  void AppendWord(uint64_t word, int64_t number_of_bits) {
+    if (ARROW_PREDICT_FALSE(number_of_bits == 0)) {
+      return;
+    }
+
+    // Location that the first byte needs to be written to.
+    uint8_t* append_position = bitmap_ + byte_offset_;
+
+    // Update state variables except for current_byte_ here.
+    position_ += number_of_bits;
+    int64_t bit_offset = bit_util::CountTrailingZeros(static_cast<uint32_t>(bit_mask_));
+    bit_mask_ = bit_util::kBitmask[(bit_offset + number_of_bits) % 8];
+    byte_offset_ += (bit_offset + number_of_bits) / 8;
+
+    if (bit_offset != 0) {
+      // We are in the middle of the byte. This code updates the byte and shifts
+      // bits appropriately within word so it can be memcpy'd below.
+      int64_t bits_to_carry = 8 - bit_offset;
+      // Carry over bits from word to current_byte_. We assume any extra bits in word
+      // unset so no additional accounting is needed for when number_of_bits <
+      // bits_to_carry.
+      current_byte_ |= (word & bit_util::kPrecedingBitmask[bits_to_carry]) << bit_offset;
+      // Check if everything is transfered into current_byte_.
+      if (ARROW_PREDICT_FALSE(number_of_bits < bits_to_carry)) {
+        return;
+      }
+      *append_position = current_byte_;
+      append_position++;
+      // Move the carry bits off of word.
+      word = word >> bits_to_carry;
+      number_of_bits -= bits_to_carry;
+    }
+    word = bit_util::ToLittleEndian(word);
+    int64_t bytes_for_word = ::arrow::bit_util::BytesForBits(number_of_bits);
+    std::memcpy(append_position, &word, bytes_for_word);
+    // At this point, the previous current_byte_ has been written to bitmap_.
+    // The new current_byte_ is either the last relevant byte in 'word'
+    // or cleared if the new position is byte aligned (i.e. a fresh byte).
+    if (bit_mask_ == 0x1) {
+      current_byte_ = 0;
+    } else {
+      current_byte_ = *(append_position + bytes_for_word - 1);
+    }
+  }
+
+  void Set() { current_byte_ |= bit_mask_; }
+
+  void Clear() {}
+
+  void Next() {
+    bit_mask_ = static_cast<uint8_t>(bit_mask_ << 1);
+    ++position_;
+    if (bit_mask_ == 0) {
+      // Finished this byte, need advancing
+      bit_mask_ = 0x01;
+      bitmap_[byte_offset_++] = current_byte_;
+      current_byte_ = 0;
+    }
+  }
+
+  void Finish() {
+    // Store current byte if we didn't went go bitmap storage
+    if (length_ > 0 && (bit_mask_ != 0x01 || position_ < length_)) {
+      bitmap_[byte_offset_] = current_byte_;
+    }
+  }
+
+  int64_t position() const { return position_; }
+
+ private:
+  uint8_t* bitmap_;
+  int64_t position_;
+  int64_t length_;
+
+  uint8_t current_byte_;
+  uint8_t bit_mask_;
+  int64_t byte_offset_;
+};
+
+template <typename Word, bool may_have_byte_offset = true>
+class BitmapWordWriter {
+ public:
+  BitmapWordWriter() = default;
+  BitmapWordWriter(uint8_t* bitmap, int64_t offset, int64_t length)
+      : offset_(static_cast<int64_t>(may_have_byte_offset) * (offset % 8)),
+        bitmap_(bitmap + offset / 8),
+        bitmap_end_(bitmap_ + bit_util::BytesForBits(offset_ + length)),
+        mask_((1U << offset_) - 1) {
+    if (offset_) {
+      if (length >= static_cast<int>(sizeof(Word) * 8)) {
+        current_data.word_ = load<Word>(bitmap_);
+      } else if (length > 0) {
+        current_data.epi.byte_ = load<uint8_t>(bitmap_);
+      }
+    }
+  }
+
+  void PutNextWord(Word word) {
+    if (may_have_byte_offset && offset_) {
+      // split one word into two adjacent words, don't touch unused bits
+      //               |<------ word ----->|
+      //               +-----+-------------+
+      //               |  A  |      B      |
+      //               +-----+-------------+
+      //                  |         |
+      //                  v         v       offset
+      // +-------------+-----+-------------+-----+
+      // |     ---     |  A  |      B      | --- |
+      // +-------------+-----+-------------+-----+
+      // |<------ next ----->|<---- current ---->|
+      word = (word << offset_) | (word >> (sizeof(Word) * 8 - offset_));
+      Word next_word = load<Word>(bitmap_ + sizeof(Word));
+      current_data.word_ = (current_data.word_ & mask_) | (word & ~mask_);
+      next_word = (next_word & ~mask_) | (word & mask_);
+      store<Word>(bitmap_, current_data.word_);
+      store<Word>(bitmap_ + sizeof(Word), next_word);
+      current_data.word_ = next_word;
+    } else {
+      store<Word>(bitmap_, word);
+    }
+    bitmap_ += sizeof(Word);
+  }
+
+  void PutNextTrailingByte(uint8_t byte, int valid_bits) {
+    if (valid_bits == 8) {
+      if (may_have_byte_offset && offset_) {
+        byte = (byte << offset_) | (byte >> (8 - offset_));
+        uint8_t next_byte = load<uint8_t>(bitmap_ + 1);
+        current_data.epi.byte_ = (current_data.epi.byte_ & mask_) | (byte & ~mask_);
+        next_byte = (next_byte & ~mask_) | (byte & mask_);
+        store<uint8_t>(bitmap_, current_data.epi.byte_);
+        store<uint8_t>(bitmap_ + 1, next_byte);
+        current_data.epi.byte_ = next_byte;
+      } else {
+        store<uint8_t>(bitmap_, byte);
+      }
+      ++bitmap_;
+    } else {
+      assert(valid_bits > 0);
+      assert(valid_bits < 8);
+      assert(bitmap_ + bit_util::BytesForBits(offset_ + valid_bits) <= bitmap_end_);
+      internal::BitmapWriter writer(bitmap_, offset_, valid_bits);
+      for (int i = 0; i < valid_bits; ++i) {
+        (byte & 0x01) ? writer.Set() : writer.Clear();
+        writer.Next();
+        byte >>= 1;
+      }
+      writer.Finish();
+    }
+  }
+
+ private:
+  int64_t offset_;
+  uint8_t* bitmap_;
+
+  const uint8_t* bitmap_end_;
+  uint64_t mask_;
+  union {
+    Word word_;
+    struct {
+#if ARROW_LITTLE_ENDIAN == 0
+      uint8_t padding_bytes_[sizeof(Word) - 1];
+#endif
+      uint8_t byte_;
+    } epi;
+  } current_data;
+
+  template <typename DType>
+  DType load(const uint8_t* bitmap) {
+    assert(bitmap + sizeof(DType) <= bitmap_end_);
+    return bit_util::ToLittleEndian(util::SafeLoadAs<DType>(bitmap));
+  }
+
+  template <typename DType>
+  void store(uint8_t* bitmap, DType data) {
+    assert(bitmap + sizeof(DType) <= bitmap_end_);
+    util::SafeStore(bitmap, bit_util::FromLittleEndian(data));
+  }
+};
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bitset_stack.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bitset_stack.h
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <array>
+#include <bitset>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/memory_pool.h"
+#include "arrow/result.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/compare.h"
+#include "arrow/util/functional.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/string_builder.h"
+#include "arrow/util/type_traits.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+
+/// \brief Store a stack of bitsets efficiently. The top bitset may be
+/// accessed and its bits may be modified, but it may not be resized.
+class BitsetStack {
+ public:
+  using reference = typename std::vector<bool>::reference;
+
+  /// \brief push a bitset onto the stack
+  /// \param size number of bits in the next bitset
+  /// \param value initial value for bits in the pushed bitset
+  void Push(int size, bool value) {
+    offsets_.push_back(bit_count());
+    bits_.resize(bit_count() + size, value);
+  }
+
+  /// \brief number of bits in the bitset at the top of the stack
+  int TopSize() const {
+    if (offsets_.size() == 0) return 0;
+    return bit_count() - offsets_.back();
+  }
+
+  /// \brief pop a bitset off the stack
+  void Pop() {
+    bits_.resize(offsets_.back());
+    offsets_.pop_back();
+  }
+
+  /// \brief get the value of a bit in the top bitset
+  /// \param i index of the bit to access
+  bool operator[](int i) const { return bits_[offsets_.back() + i]; }
+
+  /// \brief get a mutable reference to a bit in the top bitset
+  /// \param i index of the bit to access
+  reference operator[](int i) { return bits_[offsets_.back() + i]; }
+
+ private:
+  int bit_count() const { return static_cast<int>(bits_.size()); }
+  std::vector<bool> bits_;
+  std::vector<int> offsets_;
+};
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bpacking.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bpacking.h
@@ -0,0 +1,34 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/util/endian.h"
+#include "arrow/util/visibility.h"
+
+#include <stdint.h>
+
+namespace arrow {
+namespace internal {
+
+ARROW_EXPORT
+int unpack32(const uint32_t* in, uint32_t* out, int batch_size, int num_bits);
+ARROW_EXPORT
+int unpack64(const uint8_t* in, uint64_t* out, int batch_size, int num_bits);
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bpacking64_default.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bpacking64_default.h
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bpacking_avx2.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bpacking_avx2.h
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <stdint.h>
+
+namespace arrow {
+namespace internal {
+
+int unpack32_avx2(const uint32_t* in, uint32_t* out, int batch_size, int num_bits);
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bpacking_avx512.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bpacking_avx512.h
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <stdint.h>
+
+namespace arrow {
+namespace internal {
+
+int unpack32_avx512(const uint32_t* in, uint32_t* out, int batch_size, int num_bits);
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bpacking_default.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bpacking_default.h
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bpacking_neon.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bpacking_neon.h
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <stdint.h>
+
+namespace arrow {
+namespace internal {
+
+int unpack32_neon(const uint32_t* in, uint32_t* out, int batch_size, int num_bits);
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bpacking_simd128_generated.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bpacking_simd128_generated.h
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bpacking_simd256_generated.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bpacking_simd256_generated.h
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bpacking_simd512_generated.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bpacking_simd512_generated.h
@@ -0,0 +1,836 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Automatically generated file; DO NOT EDIT.
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+
+#include <xsimd/xsimd.hpp>
+
+#include "arrow/util/dispatch.h"
+#include "arrow/util/ubsan.h"
+
+namespace arrow {
+namespace internal {
+namespace {
+
+using ::arrow::util::SafeLoad;
+
+template <DispatchLevel level>
+struct UnpackBits512 {
+
+using simd_batch = xsimd::make_sized_batch_t<uint32_t, 16>;
+
+inline static const uint32_t* unpack0_32(const uint32_t* in, uint32_t* out) {
+  memset(out, 0x0, 32 * sizeof(*out));
+  out += 32;
+
+  return in;
+}
+
+inline static const uint32_t* unpack1_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 1-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 1-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 1;
+  return in;
+}
+
+inline static const uint32_t* unpack2_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 2-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) };
+  shifts = simd_batch{ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 2-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 2;
+  return in;
+}
+
+inline static const uint32_t* unpack3_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 3-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 0, 1, 4, 7, 10, 13 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 3-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 16, 19, 22, 25, 28, 0, 2, 5, 8, 11, 14, 17, 20, 23, 26, 29 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 3;
+  return in;
+}
+
+inline static const uint32_t* unpack4_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xf;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 4-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) };
+  shifts = simd_batch{ 0, 4, 8, 12, 16, 20, 24, 28, 0, 4, 8, 12, 16, 20, 24, 28 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 4-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 4, 8, 12, 16, 20, 24, 28, 0, 4, 8, 12, 16, 20, 24, 28 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 4;
+  return in;
+}
+
+inline static const uint32_t* unpack5_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1f;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 5-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 0, 5, 10, 15, 20, 25, 0, 3, 8, 13, 18, 23, 0, 1, 6, 11 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 5-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 31 | SafeLoad<uint32_t>(in + 3) << 1, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 29 | SafeLoad<uint32_t>(in + 4) << 3, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 16, 21, 26, 0, 4, 9, 14, 19, 24, 0, 2, 7, 12, 17, 22, 27 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 5;
+  return in;
+}
+
+inline static const uint32_t* unpack6_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3f;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 6-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) };
+  shifts = simd_batch{ 0, 6, 12, 18, 24, 0, 4, 10, 16, 22, 0, 2, 8, 14, 20, 26 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 6-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 6, 12, 18, 24, 0, 4, 10, 16, 22, 0, 2, 8, 14, 20, 26 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 6;
+  return in;
+}
+
+inline static const uint32_t* unpack7_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7f;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 7-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 28 | SafeLoad<uint32_t>(in + 1) << 4, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 27 | SafeLoad<uint32_t>(in + 3) << 5, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 7, 14, 21, 0, 3, 10, 17, 24, 0, 6, 13, 20, 0, 2, 9 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 7-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 16, 23, 0, 5, 12, 19, 0, 1, 8, 15, 22, 0, 4, 11, 18, 25 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 7;
+  return in;
+}
+
+inline static const uint32_t* unpack8_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 8-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) };
+  shifts = simd_batch{ 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 8-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24, 0, 8, 16, 24 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 8;
+  return in;
+}
+
+inline static const uint32_t* unpack9_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1ff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 9-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 27 | SafeLoad<uint32_t>(in + 1) << 5, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 0, 9, 18, 0, 4, 13, 22, 0, 8, 17, 0, 3, 12, 21, 0, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 9-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 25 | SafeLoad<uint32_t>(in + 5) << 7, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) };
+  shifts = simd_batch{ 16, 0, 2, 11, 20, 0, 6, 15, 0, 1, 10, 19, 0, 5, 14, 23 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 9;
+  return in;
+}
+
+inline static const uint32_t* unpack10_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3ff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 10-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) };
+  shifts = simd_batch{ 0, 10, 20, 0, 8, 18, 0, 6, 16, 0, 4, 14, 0, 2, 12, 22 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 10-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 30 | SafeLoad<uint32_t>(in + 6) << 2, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 26 | SafeLoad<uint32_t>(in + 8) << 6, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) };
+  shifts = simd_batch{ 0, 10, 20, 0, 8, 18, 0, 6, 16, 0, 4, 14, 0, 2, 12, 22 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 10;
+  return in;
+}
+
+inline static const uint32_t* unpack11_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7ff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 11-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 22 | SafeLoad<uint32_t>(in + 1) << 10, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 23 | SafeLoad<uint32_t>(in + 2) << 9, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 24 | SafeLoad<uint32_t>(in + 3) << 8, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 25 | SafeLoad<uint32_t>(in + 4) << 7, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6, SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 11, 0, 1, 12, 0, 2, 13, 0, 3, 14, 0, 4, 15, 0, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 11-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 27 | SafeLoad<uint32_t>(in + 6) << 5, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 29 | SafeLoad<uint32_t>(in + 8) << 3, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 31 | SafeLoad<uint32_t>(in + 10) << 1, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) };
+  shifts = simd_batch{ 16, 0, 6, 17, 0, 7, 18, 0, 8, 19, 0, 9, 20, 0, 10, 21 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 11;
+  return in;
+}
+
+inline static const uint32_t* unpack12_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xfff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 12-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 24 | SafeLoad<uint32_t>(in + 1) << 8, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) };
+  shifts = simd_batch{ 0, 12, 0, 4, 16, 0, 8, 20, 0, 12, 0, 4, 16, 0, 8, 20 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 12-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 28 | SafeLoad<uint32_t>(in + 11) << 4, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) };
+  shifts = simd_batch{ 0, 12, 0, 4, 16, 0, 8, 20, 0, 12, 0, 4, 16, 0, 8, 20 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 12;
+  return in;
+}
+
+inline static const uint32_t* unpack13_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1fff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 13-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 26 | SafeLoad<uint32_t>(in + 1) << 6, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 20 | SafeLoad<uint32_t>(in + 2) << 12, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 27 | SafeLoad<uint32_t>(in + 3) << 5, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 21 | SafeLoad<uint32_t>(in + 4) << 11, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 22 | SafeLoad<uint32_t>(in + 6) << 10, SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 0, 13, 0, 7, 0, 1, 14, 0, 8, 0, 2, 15, 0, 9, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 13-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 29 | SafeLoad<uint32_t>(in + 7) << 3, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 23 | SafeLoad<uint32_t>(in + 8) << 9, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 31 | SafeLoad<uint32_t>(in + 11) << 1, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 25 | SafeLoad<uint32_t>(in + 12) << 7, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) };
+  shifts = simd_batch{ 16, 0, 10, 0, 4, 17, 0, 11, 0, 5, 18, 0, 12, 0, 6, 19 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 13;
+  return in;
+}
+
+inline static const uint32_t* unpack14_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3fff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 14-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 28 | SafeLoad<uint32_t>(in + 1) << 4, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 24 | SafeLoad<uint32_t>(in + 2) << 8, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 20 | SafeLoad<uint32_t>(in + 3) << 12, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 22 | SafeLoad<uint32_t>(in + 6) << 10, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) };
+  shifts = simd_batch{ 0, 14, 0, 10, 0, 6, 0, 2, 16, 0, 12, 0, 8, 0, 4, 18 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 14-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 20 | SafeLoad<uint32_t>(in + 10) << 12, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 30 | SafeLoad<uint32_t>(in + 11) << 2, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 22 | SafeLoad<uint32_t>(in + 13) << 10, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) };
+  shifts = simd_batch{ 0, 14, 0, 10, 0, 6, 0, 2, 16, 0, 12, 0, 8, 0, 4, 18 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 14;
+  return in;
+}
+
+inline static const uint32_t* unpack15_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7fff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 15-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 22 | SafeLoad<uint32_t>(in + 5) << 10, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 18 | SafeLoad<uint32_t>(in + 7) << 14, SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 0, 15, 0, 13, 0, 11, 0, 9, 0, 7, 0, 5, 0, 3, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 15-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 31 | SafeLoad<uint32_t>(in + 8) << 1, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 29 | SafeLoad<uint32_t>(in + 9) << 3, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 27 | SafeLoad<uint32_t>(in + 10) << 5, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 25 | SafeLoad<uint32_t>(in + 11) << 7, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 23 | SafeLoad<uint32_t>(in + 12) << 9, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 21 | SafeLoad<uint32_t>(in + 13) << 11, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 19 | SafeLoad<uint32_t>(in + 14) << 13, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) };
+  shifts = simd_batch{ 16, 0, 14, 0, 12, 0, 10, 0, 8, 0, 6, 0, 4, 0, 2, 17 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 15;
+  return in;
+}
+
+inline static const uint32_t* unpack16_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 16-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) };
+  shifts = simd_batch{ 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 16-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) };
+  shifts = simd_batch{ 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 16;
+  return in;
+}
+
+inline static const uint32_t* unpack17_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1ffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 17-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 17 | SafeLoad<uint32_t>(in + 1) << 15, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 19 | SafeLoad<uint32_t>(in + 2) << 13, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 21 | SafeLoad<uint32_t>(in + 3) << 11, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 23 | SafeLoad<uint32_t>(in + 4) << 9, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 25 | SafeLoad<uint32_t>(in + 5) << 7, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 27 | SafeLoad<uint32_t>(in + 6) << 5, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 29 | SafeLoad<uint32_t>(in + 7) << 3, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 31 | SafeLoad<uint32_t>(in + 8) << 1 };
+  shifts = simd_batch{ 0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 17-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 8) >> 16 | SafeLoad<uint32_t>(in + 9) << 16, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 18 | SafeLoad<uint32_t>(in + 10) << 14, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 20 | SafeLoad<uint32_t>(in + 11) << 12, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 22 | SafeLoad<uint32_t>(in + 12) << 10, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 24 | SafeLoad<uint32_t>(in + 13) << 8, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 26 | SafeLoad<uint32_t>(in + 14) << 6, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 28 | SafeLoad<uint32_t>(in + 15) << 4, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 30 | SafeLoad<uint32_t>(in + 16) << 2, SafeLoad<uint32_t>(in + 16) };
+  shifts = simd_batch{ 0, 1, 0, 3, 0, 5, 0, 7, 0, 9, 0, 11, 0, 13, 0, 15 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 17;
+  return in;
+}
+
+inline static const uint32_t* unpack18_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3ffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 18-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 18 | SafeLoad<uint32_t>(in + 1) << 14, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 22 | SafeLoad<uint32_t>(in + 2) << 10, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4) >> 16 | SafeLoad<uint32_t>(in + 5) << 16, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8) };
+  shifts = simd_batch{ 0, 0, 4, 0, 8, 0, 12, 0, 0, 2, 0, 6, 0, 10, 0, 14 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 18-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 18 | SafeLoad<uint32_t>(in + 10) << 14, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 22 | SafeLoad<uint32_t>(in + 11) << 10, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 30 | SafeLoad<uint32_t>(in + 13) << 2, SafeLoad<uint32_t>(in + 13) >> 16 | SafeLoad<uint32_t>(in + 14) << 16, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 20 | SafeLoad<uint32_t>(in + 15) << 12, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16), SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4, SafeLoad<uint32_t>(in + 17) };
+  shifts = simd_batch{ 0, 0, 4, 0, 8, 0, 12, 0, 0, 2, 0, 6, 0, 10, 0, 14 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 18;
+  return in;
+}
+
+inline static const uint32_t* unpack19_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7ffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 19-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 19 | SafeLoad<uint32_t>(in + 1) << 13, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 25 | SafeLoad<uint32_t>(in + 2) << 7, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 31 | SafeLoad<uint32_t>(in + 3) << 1, SafeLoad<uint32_t>(in + 3) >> 18 | SafeLoad<uint32_t>(in + 4) << 14, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 24 | SafeLoad<uint32_t>(in + 5) << 8, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 30 | SafeLoad<uint32_t>(in + 6) << 2, SafeLoad<uint32_t>(in + 6) >> 17 | SafeLoad<uint32_t>(in + 7) << 15, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 23 | SafeLoad<uint32_t>(in + 8) << 9, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 29 | SafeLoad<uint32_t>(in + 9) << 3 };
+  shifts = simd_batch{ 0, 0, 6, 0, 12, 0, 0, 5, 0, 11, 0, 0, 4, 0, 10, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 19-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 9) >> 16 | SafeLoad<uint32_t>(in + 10) << 16, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 22 | SafeLoad<uint32_t>(in + 11) << 10, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 28 | SafeLoad<uint32_t>(in + 12) << 4, SafeLoad<uint32_t>(in + 12) >> 15 | SafeLoad<uint32_t>(in + 13) << 17, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 21 | SafeLoad<uint32_t>(in + 14) << 11, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 27 | SafeLoad<uint32_t>(in + 15) << 5, SafeLoad<uint32_t>(in + 15) >> 14 | SafeLoad<uint32_t>(in + 16) << 18, SafeLoad<uint32_t>(in + 16), SafeLoad<uint32_t>(in + 16) >> 20 | SafeLoad<uint32_t>(in + 17) << 12, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 26 | SafeLoad<uint32_t>(in + 18) << 6, SafeLoad<uint32_t>(in + 18) };
+  shifts = simd_batch{ 0, 3, 0, 9, 0, 0, 2, 0, 8, 0, 0, 1, 0, 7, 0, 13 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 19;
+  return in;
+}
+
+inline static const uint32_t* unpack20_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xfffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 20-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 20 | SafeLoad<uint32_t>(in + 1) << 12, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2) >> 16 | SafeLoad<uint32_t>(in + 3) << 16, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4, SafeLoad<uint32_t>(in + 7) >> 16 | SafeLoad<uint32_t>(in + 8) << 16, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9) };
+  shifts = simd_batch{ 0, 0, 8, 0, 0, 4, 0, 12, 0, 0, 8, 0, 0, 4, 0, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 20-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 20 | SafeLoad<uint32_t>(in + 11) << 12, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 28 | SafeLoad<uint32_t>(in + 12) << 4, SafeLoad<uint32_t>(in + 12) >> 16 | SafeLoad<uint32_t>(in + 13) << 16, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 24 | SafeLoad<uint32_t>(in + 14) << 8, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 20 | SafeLoad<uint32_t>(in + 16) << 12, SafeLoad<uint32_t>(in + 16), SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4, SafeLoad<uint32_t>(in + 17) >> 16 | SafeLoad<uint32_t>(in + 18) << 16, SafeLoad<uint32_t>(in + 18), SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) };
+  shifts = simd_batch{ 0, 0, 8, 0, 0, 4, 0, 12, 0, 0, 8, 0, 0, 4, 0, 12 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 20;
+  return in;
+}
+
+inline static const uint32_t* unpack21_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1fffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 21-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 21 | SafeLoad<uint32_t>(in + 1) << 11, SafeLoad<uint32_t>(in + 1), SafeLoad<uint32_t>(in + 1) >> 31 | SafeLoad<uint32_t>(in + 2) << 1, SafeLoad<uint32_t>(in + 2) >> 20 | SafeLoad<uint32_t>(in + 3) << 12, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 30 | SafeLoad<uint32_t>(in + 4) << 2, SafeLoad<uint32_t>(in + 4) >> 19 | SafeLoad<uint32_t>(in + 5) << 13, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3, SafeLoad<uint32_t>(in + 6) >> 18 | SafeLoad<uint32_t>(in + 7) << 14, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8) >> 17 | SafeLoad<uint32_t>(in + 9) << 15, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 27 | SafeLoad<uint32_t>(in + 10) << 5 };
+  shifts = simd_batch{ 0, 0, 10, 0, 0, 9, 0, 0, 8, 0, 0, 7, 0, 0, 6, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 21-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 10) >> 16 | SafeLoad<uint32_t>(in + 11) << 16, SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6, SafeLoad<uint32_t>(in + 12) >> 15 | SafeLoad<uint32_t>(in + 13) << 17, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 25 | SafeLoad<uint32_t>(in + 14) << 7, SafeLoad<uint32_t>(in + 14) >> 14 | SafeLoad<uint32_t>(in + 15) << 18, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16) >> 13 | SafeLoad<uint32_t>(in + 17) << 19, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 23 | SafeLoad<uint32_t>(in + 18) << 9, SafeLoad<uint32_t>(in + 18) >> 12 | SafeLoad<uint32_t>(in + 19) << 20, SafeLoad<uint32_t>(in + 19), SafeLoad<uint32_t>(in + 19) >> 22 | SafeLoad<uint32_t>(in + 20) << 10, SafeLoad<uint32_t>(in + 20) };
+  shifts = simd_batch{ 0, 5, 0, 0, 4, 0, 0, 3, 0, 0, 2, 0, 0, 1, 0, 11 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 21;
+  return in;
+}
+
+inline static const uint32_t* unpack22_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3fffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 22-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 22 | SafeLoad<uint32_t>(in + 1) << 10, SafeLoad<uint32_t>(in + 1) >> 12 | SafeLoad<uint32_t>(in + 2) << 20, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 24 | SafeLoad<uint32_t>(in + 3) << 8, SafeLoad<uint32_t>(in + 3) >> 14 | SafeLoad<uint32_t>(in + 4) << 18, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 26 | SafeLoad<uint32_t>(in + 5) << 6, SafeLoad<uint32_t>(in + 5) >> 16 | SafeLoad<uint32_t>(in + 6) << 16, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 28 | SafeLoad<uint32_t>(in + 7) << 4, SafeLoad<uint32_t>(in + 7) >> 18 | SafeLoad<uint32_t>(in + 8) << 14, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2, SafeLoad<uint32_t>(in + 9) >> 20 | SafeLoad<uint32_t>(in + 10) << 12, SafeLoad<uint32_t>(in + 10) };
+  shifts = simd_batch{ 0, 0, 0, 2, 0, 0, 4, 0, 0, 6, 0, 0, 8, 0, 0, 10 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 22-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11), SafeLoad<uint32_t>(in + 11) >> 22 | SafeLoad<uint32_t>(in + 12) << 10, SafeLoad<uint32_t>(in + 12) >> 12 | SafeLoad<uint32_t>(in + 13) << 20, SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 24 | SafeLoad<uint32_t>(in + 14) << 8, SafeLoad<uint32_t>(in + 14) >> 14 | SafeLoad<uint32_t>(in + 15) << 18, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 26 | SafeLoad<uint32_t>(in + 16) << 6, SafeLoad<uint32_t>(in + 16) >> 16 | SafeLoad<uint32_t>(in + 17) << 16, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 28 | SafeLoad<uint32_t>(in + 18) << 4, SafeLoad<uint32_t>(in + 18) >> 18 | SafeLoad<uint32_t>(in + 19) << 14, SafeLoad<uint32_t>(in + 19), SafeLoad<uint32_t>(in + 19) >> 30 | SafeLoad<uint32_t>(in + 20) << 2, SafeLoad<uint32_t>(in + 20) >> 20 | SafeLoad<uint32_t>(in + 21) << 12, SafeLoad<uint32_t>(in + 21) };
+  shifts = simd_batch{ 0, 0, 0, 2, 0, 0, 4, 0, 0, 6, 0, 0, 8, 0, 0, 10 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 22;
+  return in;
+}
+
+inline static const uint32_t* unpack23_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7fffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 23-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 23 | SafeLoad<uint32_t>(in + 1) << 9, SafeLoad<uint32_t>(in + 1) >> 14 | SafeLoad<uint32_t>(in + 2) << 18, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 2) >> 28 | SafeLoad<uint32_t>(in + 3) << 4, SafeLoad<uint32_t>(in + 3) >> 19 | SafeLoad<uint32_t>(in + 4) << 13, SafeLoad<uint32_t>(in + 4) >> 10 | SafeLoad<uint32_t>(in + 5) << 22, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 24 | SafeLoad<uint32_t>(in + 6) << 8, SafeLoad<uint32_t>(in + 6) >> 15 | SafeLoad<uint32_t>(in + 7) << 17, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 29 | SafeLoad<uint32_t>(in + 8) << 3, SafeLoad<uint32_t>(in + 8) >> 20 | SafeLoad<uint32_t>(in + 9) << 12, SafeLoad<uint32_t>(in + 9) >> 11 | SafeLoad<uint32_t>(in + 10) << 21, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 25 | SafeLoad<uint32_t>(in + 11) << 7 };
+  shifts = simd_batch{ 0, 0, 0, 5, 0, 0, 0, 1, 0, 0, 6, 0, 0, 0, 2, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 23-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 11) >> 16 | SafeLoad<uint32_t>(in + 12) << 16, SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 30 | SafeLoad<uint32_t>(in + 13) << 2, SafeLoad<uint32_t>(in + 13) >> 21 | SafeLoad<uint32_t>(in + 14) << 11, SafeLoad<uint32_t>(in + 14) >> 12 | SafeLoad<uint32_t>(in + 15) << 20, SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 26 | SafeLoad<uint32_t>(in + 16) << 6, SafeLoad<uint32_t>(in + 16) >> 17 | SafeLoad<uint32_t>(in + 17) << 15, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 31 | SafeLoad<uint32_t>(in + 18) << 1, SafeLoad<uint32_t>(in + 18) >> 22 | SafeLoad<uint32_t>(in + 19) << 10, SafeLoad<uint32_t>(in + 19) >> 13 | SafeLoad<uint32_t>(in + 20) << 19, SafeLoad<uint32_t>(in + 20), SafeLoad<uint32_t>(in + 20) >> 27 | SafeLoad<uint32_t>(in + 21) << 5, SafeLoad<uint32_t>(in + 21) >> 18 | SafeLoad<uint32_t>(in + 22) << 14, SafeLoad<uint32_t>(in + 22) };
+  shifts = simd_batch{ 0, 7, 0, 0, 0, 3, 0, 0, 8, 0, 0, 0, 4, 0, 0, 9 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 23;
+  return in;
+}
+
+inline static const uint32_t* unpack24_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 24-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 24 | SafeLoad<uint32_t>(in + 1) << 8, SafeLoad<uint32_t>(in + 1) >> 16 | SafeLoad<uint32_t>(in + 2) << 16, SafeLoad<uint32_t>(in + 2), SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4) >> 16 | SafeLoad<uint32_t>(in + 5) << 16, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7) >> 16 | SafeLoad<uint32_t>(in + 8) << 16, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10) >> 16 | SafeLoad<uint32_t>(in + 11) << 16, SafeLoad<uint32_t>(in + 11) };
+  shifts = simd_batch{ 0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 24-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 12), SafeLoad<uint32_t>(in + 12) >> 24 | SafeLoad<uint32_t>(in + 13) << 8, SafeLoad<uint32_t>(in + 13) >> 16 | SafeLoad<uint32_t>(in + 14) << 16, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16) >> 16 | SafeLoad<uint32_t>(in + 17) << 16, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 18), SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) >> 16 | SafeLoad<uint32_t>(in + 20) << 16, SafeLoad<uint32_t>(in + 20), SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 24 | SafeLoad<uint32_t>(in + 22) << 8, SafeLoad<uint32_t>(in + 22) >> 16 | SafeLoad<uint32_t>(in + 23) << 16, SafeLoad<uint32_t>(in + 23) };
+  shifts = simd_batch{ 0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0, 8 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 24;
+  return in;
+}
+
+inline static const uint32_t* unpack25_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1ffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 25-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 25 | SafeLoad<uint32_t>(in + 1) << 7, SafeLoad<uint32_t>(in + 1) >> 18 | SafeLoad<uint32_t>(in + 2) << 14, SafeLoad<uint32_t>(in + 2) >> 11 | SafeLoad<uint32_t>(in + 3) << 21, SafeLoad<uint32_t>(in + 3), SafeLoad<uint32_t>(in + 3) >> 29 | SafeLoad<uint32_t>(in + 4) << 3, SafeLoad<uint32_t>(in + 4) >> 22 | SafeLoad<uint32_t>(in + 5) << 10, SafeLoad<uint32_t>(in + 5) >> 15 | SafeLoad<uint32_t>(in + 6) << 17, SafeLoad<uint32_t>(in + 6) >> 8 | SafeLoad<uint32_t>(in + 7) << 24, SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 26 | SafeLoad<uint32_t>(in + 8) << 6, SafeLoad<uint32_t>(in + 8) >> 19 | SafeLoad<uint32_t>(in + 9) << 13, SafeLoad<uint32_t>(in + 9) >> 12 | SafeLoad<uint32_t>(in + 10) << 20, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 30 | SafeLoad<uint32_t>(in + 11) << 2, SafeLoad<uint32_t>(in + 11) >> 23 | SafeLoad<uint32_t>(in + 12) << 9 };
+  shifts = simd_batch{ 0, 0, 0, 0, 4, 0, 0, 0, 0, 1, 0, 0, 0, 5, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 25-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 12) >> 16 | SafeLoad<uint32_t>(in + 13) << 16, SafeLoad<uint32_t>(in + 13) >> 9 | SafeLoad<uint32_t>(in + 14) << 23, SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 27 | SafeLoad<uint32_t>(in + 15) << 5, SafeLoad<uint32_t>(in + 15) >> 20 | SafeLoad<uint32_t>(in + 16) << 12, SafeLoad<uint32_t>(in + 16) >> 13 | SafeLoad<uint32_t>(in + 17) << 19, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 31 | SafeLoad<uint32_t>(in + 18) << 1, SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) >> 17 | SafeLoad<uint32_t>(in + 20) << 15, SafeLoad<uint32_t>(in + 20) >> 10 | SafeLoad<uint32_t>(in + 21) << 22, SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 28 | SafeLoad<uint32_t>(in + 22) << 4, SafeLoad<uint32_t>(in + 22) >> 21 | SafeLoad<uint32_t>(in + 23) << 11, SafeLoad<uint32_t>(in + 23) >> 14 | SafeLoad<uint32_t>(in + 24) << 18, SafeLoad<uint32_t>(in + 24) };
+  shifts = simd_batch{ 0, 0, 2, 0, 0, 0, 6, 0, 0, 0, 0, 3, 0, 0, 0, 7 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 25;
+  return in;
+}
+
+inline static const uint32_t* unpack26_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3ffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 26-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 26 | SafeLoad<uint32_t>(in + 1) << 6, SafeLoad<uint32_t>(in + 1) >> 20 | SafeLoad<uint32_t>(in + 2) << 12, SafeLoad<uint32_t>(in + 2) >> 14 | SafeLoad<uint32_t>(in + 3) << 18, SafeLoad<uint32_t>(in + 3) >> 8 | SafeLoad<uint32_t>(in + 4) << 24, SafeLoad<uint32_t>(in + 4), SafeLoad<uint32_t>(in + 4) >> 28 | SafeLoad<uint32_t>(in + 5) << 4, SafeLoad<uint32_t>(in + 5) >> 22 | SafeLoad<uint32_t>(in + 6) << 10, SafeLoad<uint32_t>(in + 6) >> 16 | SafeLoad<uint32_t>(in + 7) << 16, SafeLoad<uint32_t>(in + 7) >> 10 | SafeLoad<uint32_t>(in + 8) << 22, SafeLoad<uint32_t>(in + 8), SafeLoad<uint32_t>(in + 8) >> 30 | SafeLoad<uint32_t>(in + 9) << 2, SafeLoad<uint32_t>(in + 9) >> 24 | SafeLoad<uint32_t>(in + 10) << 8, SafeLoad<uint32_t>(in + 10) >> 18 | SafeLoad<uint32_t>(in + 11) << 14, SafeLoad<uint32_t>(in + 11) >> 12 | SafeLoad<uint32_t>(in + 12) << 20, SafeLoad<uint32_t>(in + 12) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 4, 0, 0, 0, 0, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 26-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 13), SafeLoad<uint32_t>(in + 13) >> 26 | SafeLoad<uint32_t>(in + 14) << 6, SafeLoad<uint32_t>(in + 14) >> 20 | SafeLoad<uint32_t>(in + 15) << 12, SafeLoad<uint32_t>(in + 15) >> 14 | SafeLoad<uint32_t>(in + 16) << 18, SafeLoad<uint32_t>(in + 16) >> 8 | SafeLoad<uint32_t>(in + 17) << 24, SafeLoad<uint32_t>(in + 17), SafeLoad<uint32_t>(in + 17) >> 28 | SafeLoad<uint32_t>(in + 18) << 4, SafeLoad<uint32_t>(in + 18) >> 22 | SafeLoad<uint32_t>(in + 19) << 10, SafeLoad<uint32_t>(in + 19) >> 16 | SafeLoad<uint32_t>(in + 20) << 16, SafeLoad<uint32_t>(in + 20) >> 10 | SafeLoad<uint32_t>(in + 21) << 22, SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 30 | SafeLoad<uint32_t>(in + 22) << 2, SafeLoad<uint32_t>(in + 22) >> 24 | SafeLoad<uint32_t>(in + 23) << 8, SafeLoad<uint32_t>(in + 23) >> 18 | SafeLoad<uint32_t>(in + 24) << 14, SafeLoad<uint32_t>(in + 24) >> 12 | SafeLoad<uint32_t>(in + 25) << 20, SafeLoad<uint32_t>(in + 25) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 4, 0, 0, 0, 0, 6 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 26;
+  return in;
+}
+
+inline static const uint32_t* unpack27_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7ffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 27-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 27 | SafeLoad<uint32_t>(in + 1) << 5, SafeLoad<uint32_t>(in + 1) >> 22 | SafeLoad<uint32_t>(in + 2) << 10, SafeLoad<uint32_t>(in + 2) >> 17 | SafeLoad<uint32_t>(in + 3) << 15, SafeLoad<uint32_t>(in + 3) >> 12 | SafeLoad<uint32_t>(in + 4) << 20, SafeLoad<uint32_t>(in + 4) >> 7 | SafeLoad<uint32_t>(in + 5) << 25, SafeLoad<uint32_t>(in + 5), SafeLoad<uint32_t>(in + 5) >> 29 | SafeLoad<uint32_t>(in + 6) << 3, SafeLoad<uint32_t>(in + 6) >> 24 | SafeLoad<uint32_t>(in + 7) << 8, SafeLoad<uint32_t>(in + 7) >> 19 | SafeLoad<uint32_t>(in + 8) << 13, SafeLoad<uint32_t>(in + 8) >> 14 | SafeLoad<uint32_t>(in + 9) << 18, SafeLoad<uint32_t>(in + 9) >> 9 | SafeLoad<uint32_t>(in + 10) << 23, SafeLoad<uint32_t>(in + 10), SafeLoad<uint32_t>(in + 10) >> 31 | SafeLoad<uint32_t>(in + 11) << 1, SafeLoad<uint32_t>(in + 11) >> 26 | SafeLoad<uint32_t>(in + 12) << 6, SafeLoad<uint32_t>(in + 12) >> 21 | SafeLoad<uint32_t>(in + 13) << 11 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 4, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 27-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 13) >> 16 | SafeLoad<uint32_t>(in + 14) << 16, SafeLoad<uint32_t>(in + 14) >> 11 | SafeLoad<uint32_t>(in + 15) << 21, SafeLoad<uint32_t>(in + 15) >> 6 | SafeLoad<uint32_t>(in + 16) << 26, SafeLoad<uint32_t>(in + 16), SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4, SafeLoad<uint32_t>(in + 17) >> 23 | SafeLoad<uint32_t>(in + 18) << 9, SafeLoad<uint32_t>(in + 18) >> 18 | SafeLoad<uint32_t>(in + 19) << 14, SafeLoad<uint32_t>(in + 19) >> 13 | SafeLoad<uint32_t>(in + 20) << 19, SafeLoad<uint32_t>(in + 20) >> 8 | SafeLoad<uint32_t>(in + 21) << 24, SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 30 | SafeLoad<uint32_t>(in + 22) << 2, SafeLoad<uint32_t>(in + 22) >> 25 | SafeLoad<uint32_t>(in + 23) << 7, SafeLoad<uint32_t>(in + 23) >> 20 | SafeLoad<uint32_t>(in + 24) << 12, SafeLoad<uint32_t>(in + 24) >> 15 | SafeLoad<uint32_t>(in + 25) << 17, SafeLoad<uint32_t>(in + 25) >> 10 | SafeLoad<uint32_t>(in + 26) << 22, SafeLoad<uint32_t>(in + 26) };
+  shifts = simd_batch{ 0, 0, 0, 1, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 5 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 27;
+  return in;
+}
+
+inline static const uint32_t* unpack28_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0xfffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 28-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 28 | SafeLoad<uint32_t>(in + 1) << 4, SafeLoad<uint32_t>(in + 1) >> 24 | SafeLoad<uint32_t>(in + 2) << 8, SafeLoad<uint32_t>(in + 2) >> 20 | SafeLoad<uint32_t>(in + 3) << 12, SafeLoad<uint32_t>(in + 3) >> 16 | SafeLoad<uint32_t>(in + 4) << 16, SafeLoad<uint32_t>(in + 4) >> 12 | SafeLoad<uint32_t>(in + 5) << 20, SafeLoad<uint32_t>(in + 5) >> 8 | SafeLoad<uint32_t>(in + 6) << 24, SafeLoad<uint32_t>(in + 6), SafeLoad<uint32_t>(in + 7), SafeLoad<uint32_t>(in + 7) >> 28 | SafeLoad<uint32_t>(in + 8) << 4, SafeLoad<uint32_t>(in + 8) >> 24 | SafeLoad<uint32_t>(in + 9) << 8, SafeLoad<uint32_t>(in + 9) >> 20 | SafeLoad<uint32_t>(in + 10) << 12, SafeLoad<uint32_t>(in + 10) >> 16 | SafeLoad<uint32_t>(in + 11) << 16, SafeLoad<uint32_t>(in + 11) >> 12 | SafeLoad<uint32_t>(in + 12) << 20, SafeLoad<uint32_t>(in + 12) >> 8 | SafeLoad<uint32_t>(in + 13) << 24, SafeLoad<uint32_t>(in + 13) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 28-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 14), SafeLoad<uint32_t>(in + 14) >> 28 | SafeLoad<uint32_t>(in + 15) << 4, SafeLoad<uint32_t>(in + 15) >> 24 | SafeLoad<uint32_t>(in + 16) << 8, SafeLoad<uint32_t>(in + 16) >> 20 | SafeLoad<uint32_t>(in + 17) << 12, SafeLoad<uint32_t>(in + 17) >> 16 | SafeLoad<uint32_t>(in + 18) << 16, SafeLoad<uint32_t>(in + 18) >> 12 | SafeLoad<uint32_t>(in + 19) << 20, SafeLoad<uint32_t>(in + 19) >> 8 | SafeLoad<uint32_t>(in + 20) << 24, SafeLoad<uint32_t>(in + 20), SafeLoad<uint32_t>(in + 21), SafeLoad<uint32_t>(in + 21) >> 28 | SafeLoad<uint32_t>(in + 22) << 4, SafeLoad<uint32_t>(in + 22) >> 24 | SafeLoad<uint32_t>(in + 23) << 8, SafeLoad<uint32_t>(in + 23) >> 20 | SafeLoad<uint32_t>(in + 24) << 12, SafeLoad<uint32_t>(in + 24) >> 16 | SafeLoad<uint32_t>(in + 25) << 16, SafeLoad<uint32_t>(in + 25) >> 12 | SafeLoad<uint32_t>(in + 26) << 20, SafeLoad<uint32_t>(in + 26) >> 8 | SafeLoad<uint32_t>(in + 27) << 24, SafeLoad<uint32_t>(in + 27) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 4 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 28;
+  return in;
+}
+
+inline static const uint32_t* unpack29_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x1fffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 29-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 29 | SafeLoad<uint32_t>(in + 1) << 3, SafeLoad<uint32_t>(in + 1) >> 26 | SafeLoad<uint32_t>(in + 2) << 6, SafeLoad<uint32_t>(in + 2) >> 23 | SafeLoad<uint32_t>(in + 3) << 9, SafeLoad<uint32_t>(in + 3) >> 20 | SafeLoad<uint32_t>(in + 4) << 12, SafeLoad<uint32_t>(in + 4) >> 17 | SafeLoad<uint32_t>(in + 5) << 15, SafeLoad<uint32_t>(in + 5) >> 14 | SafeLoad<uint32_t>(in + 6) << 18, SafeLoad<uint32_t>(in + 6) >> 11 | SafeLoad<uint32_t>(in + 7) << 21, SafeLoad<uint32_t>(in + 7) >> 8 | SafeLoad<uint32_t>(in + 8) << 24, SafeLoad<uint32_t>(in + 8) >> 5 | SafeLoad<uint32_t>(in + 9) << 27, SafeLoad<uint32_t>(in + 9), SafeLoad<uint32_t>(in + 9) >> 31 | SafeLoad<uint32_t>(in + 10) << 1, SafeLoad<uint32_t>(in + 10) >> 28 | SafeLoad<uint32_t>(in + 11) << 4, SafeLoad<uint32_t>(in + 11) >> 25 | SafeLoad<uint32_t>(in + 12) << 7, SafeLoad<uint32_t>(in + 12) >> 22 | SafeLoad<uint32_t>(in + 13) << 10, SafeLoad<uint32_t>(in + 13) >> 19 | SafeLoad<uint32_t>(in + 14) << 13 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 29-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 14) >> 16 | SafeLoad<uint32_t>(in + 15) << 16, SafeLoad<uint32_t>(in + 15) >> 13 | SafeLoad<uint32_t>(in + 16) << 19, SafeLoad<uint32_t>(in + 16) >> 10 | SafeLoad<uint32_t>(in + 17) << 22, SafeLoad<uint32_t>(in + 17) >> 7 | SafeLoad<uint32_t>(in + 18) << 25, SafeLoad<uint32_t>(in + 18) >> 4 | SafeLoad<uint32_t>(in + 19) << 28, SafeLoad<uint32_t>(in + 19), SafeLoad<uint32_t>(in + 19) >> 30 | SafeLoad<uint32_t>(in + 20) << 2, SafeLoad<uint32_t>(in + 20) >> 27 | SafeLoad<uint32_t>(in + 21) << 5, SafeLoad<uint32_t>(in + 21) >> 24 | SafeLoad<uint32_t>(in + 22) << 8, SafeLoad<uint32_t>(in + 22) >> 21 | SafeLoad<uint32_t>(in + 23) << 11, SafeLoad<uint32_t>(in + 23) >> 18 | SafeLoad<uint32_t>(in + 24) << 14, SafeLoad<uint32_t>(in + 24) >> 15 | SafeLoad<uint32_t>(in + 25) << 17, SafeLoad<uint32_t>(in + 25) >> 12 | SafeLoad<uint32_t>(in + 26) << 20, SafeLoad<uint32_t>(in + 26) >> 9 | SafeLoad<uint32_t>(in + 27) << 23, SafeLoad<uint32_t>(in + 27) >> 6 | SafeLoad<uint32_t>(in + 28) << 26, SafeLoad<uint32_t>(in + 28) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 29;
+  return in;
+}
+
+inline static const uint32_t* unpack30_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x3fffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 30-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 30 | SafeLoad<uint32_t>(in + 1) << 2, SafeLoad<uint32_t>(in + 1) >> 28 | SafeLoad<uint32_t>(in + 2) << 4, SafeLoad<uint32_t>(in + 2) >> 26 | SafeLoad<uint32_t>(in + 3) << 6, SafeLoad<uint32_t>(in + 3) >> 24 | SafeLoad<uint32_t>(in + 4) << 8, SafeLoad<uint32_t>(in + 4) >> 22 | SafeLoad<uint32_t>(in + 5) << 10, SafeLoad<uint32_t>(in + 5) >> 20 | SafeLoad<uint32_t>(in + 6) << 12, SafeLoad<uint32_t>(in + 6) >> 18 | SafeLoad<uint32_t>(in + 7) << 14, SafeLoad<uint32_t>(in + 7) >> 16 | SafeLoad<uint32_t>(in + 8) << 16, SafeLoad<uint32_t>(in + 8) >> 14 | SafeLoad<uint32_t>(in + 9) << 18, SafeLoad<uint32_t>(in + 9) >> 12 | SafeLoad<uint32_t>(in + 10) << 20, SafeLoad<uint32_t>(in + 10) >> 10 | SafeLoad<uint32_t>(in + 11) << 22, SafeLoad<uint32_t>(in + 11) >> 8 | SafeLoad<uint32_t>(in + 12) << 24, SafeLoad<uint32_t>(in + 12) >> 6 | SafeLoad<uint32_t>(in + 13) << 26, SafeLoad<uint32_t>(in + 13) >> 4 | SafeLoad<uint32_t>(in + 14) << 28, SafeLoad<uint32_t>(in + 14) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 30-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15), SafeLoad<uint32_t>(in + 15) >> 30 | SafeLoad<uint32_t>(in + 16) << 2, SafeLoad<uint32_t>(in + 16) >> 28 | SafeLoad<uint32_t>(in + 17) << 4, SafeLoad<uint32_t>(in + 17) >> 26 | SafeLoad<uint32_t>(in + 18) << 6, SafeLoad<uint32_t>(in + 18) >> 24 | SafeLoad<uint32_t>(in + 19) << 8, SafeLoad<uint32_t>(in + 19) >> 22 | SafeLoad<uint32_t>(in + 20) << 10, SafeLoad<uint32_t>(in + 20) >> 20 | SafeLoad<uint32_t>(in + 21) << 12, SafeLoad<uint32_t>(in + 21) >> 18 | SafeLoad<uint32_t>(in + 22) << 14, SafeLoad<uint32_t>(in + 22) >> 16 | SafeLoad<uint32_t>(in + 23) << 16, SafeLoad<uint32_t>(in + 23) >> 14 | SafeLoad<uint32_t>(in + 24) << 18, SafeLoad<uint32_t>(in + 24) >> 12 | SafeLoad<uint32_t>(in + 25) << 20, SafeLoad<uint32_t>(in + 25) >> 10 | SafeLoad<uint32_t>(in + 26) << 22, SafeLoad<uint32_t>(in + 26) >> 8 | SafeLoad<uint32_t>(in + 27) << 24, SafeLoad<uint32_t>(in + 27) >> 6 | SafeLoad<uint32_t>(in + 28) << 26, SafeLoad<uint32_t>(in + 28) >> 4 | SafeLoad<uint32_t>(in + 29) << 28, SafeLoad<uint32_t>(in + 29) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 30;
+  return in;
+}
+
+inline static const uint32_t* unpack31_32(const uint32_t* in, uint32_t* out) {
+  uint32_t mask = 0x7fffffff;
+
+  simd_batch masks(mask);
+  simd_batch words, shifts;
+  simd_batch results;
+
+  // extract 31-bit bundles 0 to 15
+  words = simd_batch{ SafeLoad<uint32_t>(in + 0), SafeLoad<uint32_t>(in + 0) >> 31 | SafeLoad<uint32_t>(in + 1) << 1, SafeLoad<uint32_t>(in + 1) >> 30 | SafeLoad<uint32_t>(in + 2) << 2, SafeLoad<uint32_t>(in + 2) >> 29 | SafeLoad<uint32_t>(in + 3) << 3, SafeLoad<uint32_t>(in + 3) >> 28 | SafeLoad<uint32_t>(in + 4) << 4, SafeLoad<uint32_t>(in + 4) >> 27 | SafeLoad<uint32_t>(in + 5) << 5, SafeLoad<uint32_t>(in + 5) >> 26 | SafeLoad<uint32_t>(in + 6) << 6, SafeLoad<uint32_t>(in + 6) >> 25 | SafeLoad<uint32_t>(in + 7) << 7, SafeLoad<uint32_t>(in + 7) >> 24 | SafeLoad<uint32_t>(in + 8) << 8, SafeLoad<uint32_t>(in + 8) >> 23 | SafeLoad<uint32_t>(in + 9) << 9, SafeLoad<uint32_t>(in + 9) >> 22 | SafeLoad<uint32_t>(in + 10) << 10, SafeLoad<uint32_t>(in + 10) >> 21 | SafeLoad<uint32_t>(in + 11) << 11, SafeLoad<uint32_t>(in + 11) >> 20 | SafeLoad<uint32_t>(in + 12) << 12, SafeLoad<uint32_t>(in + 12) >> 19 | SafeLoad<uint32_t>(in + 13) << 13, SafeLoad<uint32_t>(in + 13) >> 18 | SafeLoad<uint32_t>(in + 14) << 14, SafeLoad<uint32_t>(in + 14) >> 17 | SafeLoad<uint32_t>(in + 15) << 15 };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  // extract 31-bit bundles 16 to 31
+  words = simd_batch{ SafeLoad<uint32_t>(in + 15) >> 16 | SafeLoad<uint32_t>(in + 16) << 16, SafeLoad<uint32_t>(in + 16) >> 15 | SafeLoad<uint32_t>(in + 17) << 17, SafeLoad<uint32_t>(in + 17) >> 14 | SafeLoad<uint32_t>(in + 18) << 18, SafeLoad<uint32_t>(in + 18) >> 13 | SafeLoad<uint32_t>(in + 19) << 19, SafeLoad<uint32_t>(in + 19) >> 12 | SafeLoad<uint32_t>(in + 20) << 20, SafeLoad<uint32_t>(in + 20) >> 11 | SafeLoad<uint32_t>(in + 21) << 21, SafeLoad<uint32_t>(in + 21) >> 10 | SafeLoad<uint32_t>(in + 22) << 22, SafeLoad<uint32_t>(in + 22) >> 9 | SafeLoad<uint32_t>(in + 23) << 23, SafeLoad<uint32_t>(in + 23) >> 8 | SafeLoad<uint32_t>(in + 24) << 24, SafeLoad<uint32_t>(in + 24) >> 7 | SafeLoad<uint32_t>(in + 25) << 25, SafeLoad<uint32_t>(in + 25) >> 6 | SafeLoad<uint32_t>(in + 26) << 26, SafeLoad<uint32_t>(in + 26) >> 5 | SafeLoad<uint32_t>(in + 27) << 27, SafeLoad<uint32_t>(in + 27) >> 4 | SafeLoad<uint32_t>(in + 28) << 28, SafeLoad<uint32_t>(in + 28) >> 3 | SafeLoad<uint32_t>(in + 29) << 29, SafeLoad<uint32_t>(in + 29) >> 2 | SafeLoad<uint32_t>(in + 30) << 30, SafeLoad<uint32_t>(in + 30) };
+  shifts = simd_batch{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
+  results = (words >> shifts) & masks;
+  results.store_unaligned(out);
+  out += 16;
+
+  in += 31;
+  return in;
+}
+
+inline static const uint32_t* unpack32_32(const uint32_t* in, uint32_t* out) {
+  memcpy(out, in, 32 * sizeof(*out));
+  in += 32;
+  out += 32;
+
+  return in;
+}
+
+};  // struct UnpackBits512
+
+}  // namespace
+}  // namespace internal
+}  // namespace arrow
+
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/byte_size.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/byte_size.h
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+
+#include "arrow/type_fwd.h"
+
+namespace arrow {
+
+namespace util {
+
+/// \brief The sum of bytes in each buffer referenced by the array
+///
+/// Note: An array may only reference a portion of a buffer.
+///       This method will overestimate in this case and return the
+///       byte size of the entire buffer.
+/// Note: If a buffer is referenced multiple times then it will
+///       only be counted once.
+ARROW_EXPORT int64_t TotalBufferSize(const ArrayData& array_data);
+/// \brief The sum of bytes in each buffer referenced by the array
+/// \see TotalBufferSize(const ArrayData& array_data) for details
+ARROW_EXPORT int64_t TotalBufferSize(const Array& array);
+/// \brief The sum of bytes in each buffer referenced by the array
+/// \see TotalBufferSize(const ArrayData& array_data) for details
+ARROW_EXPORT int64_t TotalBufferSize(const ChunkedArray& chunked_array);
+/// \brief The sum of bytes in each buffer referenced by the batch
+/// \see TotalBufferSize(const ArrayData& array_data) for details
+ARROW_EXPORT int64_t TotalBufferSize(const RecordBatch& record_batch);
+/// \brief The sum of bytes in each buffer referenced by the table
+/// \see TotalBufferSize(const ArrayData& array_data) for details
+ARROW_EXPORT int64_t TotalBufferSize(const Table& table);
+
+/// \brief Calculate the buffer ranges referenced by the array
+///
+/// These ranges will take into account array offsets
+///
+/// The ranges may contain duplicates
+///
+/// Dictionary arrays will ignore the offset of their containing array
+///
+/// The return value will be a struct array corresponding to the schema:
+/// schema({field("start", uint64()), field("offset", uint64()), field("length",
+/// uint64()))
+ARROW_EXPORT Result<std::shared_ptr<Array>> ReferencedRanges(const ArrayData& array_data);
+
+/// \brief Returns the sum of bytes from all buffer ranges referenced
+///
+/// Unlike TotalBufferSize this method will account for array
+/// offsets.
+///
+/// If buffers are shared between arrays then the shared
+/// portion will be counted multiple times.
+///
+/// Dictionary arrays will always be counted in their entirety
+/// even if the array only references a portion of the dictionary.
+ARROW_EXPORT Result<int64_t> ReferencedBufferSize(const ArrayData& array_data);
+/// \brief Returns the sum of bytes from all buffer ranges referenced
+/// \see ReferencedBufferSize(const ArrayData& array_data) for details
+ARROW_EXPORT Result<int64_t> ReferencedBufferSize(const Array& array_data);
+/// \brief Returns the sum of bytes from all buffer ranges referenced
+/// \see ReferencedBufferSize(const ArrayData& array_data) for details
+ARROW_EXPORT Result<int64_t> ReferencedBufferSize(const ChunkedArray& array_data);
+/// \brief Returns the sum of bytes from all buffer ranges referenced
+/// \see ReferencedBufferSize(const ArrayData& array_data) for details
+ARROW_EXPORT Result<int64_t> ReferencedBufferSize(const RecordBatch& array_data);
+/// \brief Returns the sum of bytes from all buffer ranges referenced
+/// \see ReferencedBufferSize(const ArrayData& array_data) for details
+ARROW_EXPORT Result<int64_t> ReferencedBufferSize(const Table& array_data);
+
+}  // namespace util
+
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/byte_stream_split.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/byte_stream_split.h
@@ -0,0 +1,626 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/util/simd.h"
+#include "arrow/util/ubsan.h"
+
+#include <stdint.h>
+#include <algorithm>
+
+#ifdef ARROW_HAVE_SSE4_2
+// Enable the SIMD for ByteStreamSplit Encoder/Decoder
+#define ARROW_HAVE_SIMD_SPLIT
+#endif  // ARROW_HAVE_SSE4_2
+
+namespace arrow {
+namespace util {
+namespace internal {
+
+#if defined(ARROW_HAVE_SSE4_2)
+template <typename T>
+void ByteStreamSplitDecodeSse2(const uint8_t* data, int64_t num_values, int64_t stride,
+                               T* out) {
+  constexpr size_t kNumStreams = sizeof(T);
+  static_assert(kNumStreams == 4U || kNumStreams == 8U, "Invalid number of streams.");
+  constexpr size_t kNumStreamsLog2 = (kNumStreams == 8U ? 3U : 2U);
+
+  const int64_t size = num_values * sizeof(T);
+  constexpr int64_t kBlockSize = sizeof(__m128i) * kNumStreams;
+  const int64_t num_blocks = size / kBlockSize;
+  uint8_t* output_data = reinterpret_cast<uint8_t*>(out);
+
+  // First handle suffix.
+  // This helps catch if the simd-based processing overflows into the suffix
+  // since almost surely a test would fail.
+  const int64_t num_processed_elements = (num_blocks * kBlockSize) / kNumStreams;
+  for (int64_t i = num_processed_elements; i < num_values; ++i) {
+    uint8_t gathered_byte_data[kNumStreams];
+    for (size_t b = 0; b < kNumStreams; ++b) {
+      const size_t byte_index = b * stride + i;
+      gathered_byte_data[b] = data[byte_index];
+    }
+    out[i] = arrow::util::SafeLoadAs<T>(&gathered_byte_data[0]);
+  }
+
+  // The blocks get processed hierarchically using the unpack intrinsics.
+  // Example with four streams:
+  // Stage 1: AAAA BBBB CCCC DDDD
+  // Stage 2: ACAC ACAC BDBD BDBD
+  // Stage 3: ABCD ABCD ABCD ABCD
+  __m128i stage[kNumStreamsLog2 + 1U][kNumStreams];
+  constexpr size_t kNumStreamsHalf = kNumStreams / 2U;
+
+  for (int64_t i = 0; i < num_blocks; ++i) {
+    for (size_t j = 0; j < kNumStreams; ++j) {
+      stage[0][j] = _mm_loadu_si128(
+          reinterpret_cast<const __m128i*>(&data[i * sizeof(__m128i) + j * stride]));
+    }
+    for (size_t step = 0; step < kNumStreamsLog2; ++step) {
+      for (size_t j = 0; j < kNumStreamsHalf; ++j) {
+        stage[step + 1U][j * 2] =
+            _mm_unpacklo_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]);
+        stage[step + 1U][j * 2 + 1U] =
+            _mm_unpackhi_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]);
+      }
+    }
+    for (size_t j = 0; j < kNumStreams; ++j) {
+      _mm_storeu_si128(reinterpret_cast<__m128i*>(
+                           &output_data[(i * kNumStreams + j) * sizeof(__m128i)]),
+                       stage[kNumStreamsLog2][j]);
+    }
+  }
+}
+
+template <typename T>
+void ByteStreamSplitEncodeSse2(const uint8_t* raw_values, const size_t num_values,
+                               uint8_t* output_buffer_raw) {
+  constexpr size_t kNumStreams = sizeof(T);
+  static_assert(kNumStreams == 4U || kNumStreams == 8U, "Invalid number of streams.");
+  __m128i stage[3][kNumStreams];
+  __m128i final_result[kNumStreams];
+
+  const size_t size = num_values * sizeof(T);
+  constexpr size_t kBlockSize = sizeof(__m128i) * kNumStreams;
+  const size_t num_blocks = size / kBlockSize;
+  const __m128i* raw_values_sse = reinterpret_cast<const __m128i*>(raw_values);
+  __m128i* output_buffer_streams[kNumStreams];
+  for (size_t i = 0; i < kNumStreams; ++i) {
+    output_buffer_streams[i] =
+        reinterpret_cast<__m128i*>(&output_buffer_raw[num_values * i]);
+  }
+
+  // First handle suffix.
+  const size_t num_processed_elements = (num_blocks * kBlockSize) / sizeof(T);
+  for (size_t i = num_processed_elements; i < num_values; ++i) {
+    for (size_t j = 0U; j < kNumStreams; ++j) {
+      const uint8_t byte_in_value = raw_values[i * kNumStreams + j];
+      output_buffer_raw[j * num_values + i] = byte_in_value;
+    }
+  }
+  // The current shuffling algorithm diverges for float and double types but the compiler
+  // should be able to remove the branch since only one path is taken for each template
+  // instantiation.
+  // Example run for floats:
+  // Step 0, copy:
+  //   0: ABCD ABCD ABCD ABCD 1: ABCD ABCD ABCD ABCD ...
+  // Step 1: _mm_unpacklo_epi8 and mm_unpackhi_epi8:
+  //   0: AABB CCDD AABB CCDD 1: AABB CCDD AABB CCDD ...
+  //   0: AAAA BBBB CCCC DDDD 1: AAAA BBBB CCCC DDDD ...
+  // Step 3: __mm_unpacklo_epi8 and _mm_unpackhi_epi8:
+  //   0: AAAA AAAA BBBB BBBB 1: CCCC CCCC DDDD DDDD ...
+  // Step 4: __mm_unpacklo_epi64 and _mm_unpackhi_epi64:
+  //   0: AAAA AAAA AAAA AAAA 1: BBBB BBBB BBBB BBBB ...
+  for (size_t block_index = 0; block_index < num_blocks; ++block_index) {
+    // First copy the data to stage 0.
+    for (size_t i = 0; i < kNumStreams; ++i) {
+      stage[0][i] = _mm_loadu_si128(&raw_values_sse[block_index * kNumStreams + i]);
+    }
+
+    // The shuffling of bytes is performed through the unpack intrinsics.
+    // In my measurements this gives better performance then an implementation
+    // which uses the shuffle intrinsics.
+    for (size_t stage_lvl = 0; stage_lvl < 2U; ++stage_lvl) {
+      for (size_t i = 0; i < kNumStreams / 2U; ++i) {
+        stage[stage_lvl + 1][i * 2] =
+            _mm_unpacklo_epi8(stage[stage_lvl][i * 2], stage[stage_lvl][i * 2 + 1]);
+        stage[stage_lvl + 1][i * 2 + 1] =
+            _mm_unpackhi_epi8(stage[stage_lvl][i * 2], stage[stage_lvl][i * 2 + 1]);
+      }
+    }
+    if (kNumStreams == 8U) {
+      // This is the path for double.
+      __m128i tmp[8];
+      for (size_t i = 0; i < 4; ++i) {
+        tmp[i * 2] = _mm_unpacklo_epi32(stage[2][i], stage[2][i + 4]);
+        tmp[i * 2 + 1] = _mm_unpackhi_epi32(stage[2][i], stage[2][i + 4]);
+      }
+
+      for (size_t i = 0; i < 4; ++i) {
+        final_result[i * 2] = _mm_unpacklo_epi32(tmp[i], tmp[i + 4]);
+        final_result[i * 2 + 1] = _mm_unpackhi_epi32(tmp[i], tmp[i + 4]);
+      }
+    } else {
+      // this is the path for float.
+      __m128i tmp[4];
+      for (size_t i = 0; i < 2; ++i) {
+        tmp[i * 2] = _mm_unpacklo_epi8(stage[2][i * 2], stage[2][i * 2 + 1]);
+        tmp[i * 2 + 1] = _mm_unpackhi_epi8(stage[2][i * 2], stage[2][i * 2 + 1]);
+      }
+      for (size_t i = 0; i < 2; ++i) {
+        final_result[i * 2] = _mm_unpacklo_epi64(tmp[i], tmp[i + 2]);
+        final_result[i * 2 + 1] = _mm_unpackhi_epi64(tmp[i], tmp[i + 2]);
+      }
+    }
+    for (size_t i = 0; i < kNumStreams; ++i) {
+      _mm_storeu_si128(&output_buffer_streams[i][block_index], final_result[i]);
+    }
+  }
+}
+#endif  // ARROW_HAVE_SSE4_2
+
+#if defined(ARROW_HAVE_AVX2)
+template <typename T>
+void ByteStreamSplitDecodeAvx2(const uint8_t* data, int64_t num_values, int64_t stride,
+                               T* out) {
+  constexpr size_t kNumStreams = sizeof(T);
+  static_assert(kNumStreams == 4U || kNumStreams == 8U, "Invalid number of streams.");
+  constexpr size_t kNumStreamsLog2 = (kNumStreams == 8U ? 3U : 2U);
+
+  const int64_t size = num_values * sizeof(T);
+  constexpr int64_t kBlockSize = sizeof(__m256i) * kNumStreams;
+  if (size < kBlockSize)  // Back to SSE for small size
+    return ByteStreamSplitDecodeSse2(data, num_values, stride, out);
+  const int64_t num_blocks = size / kBlockSize;
+  uint8_t* output_data = reinterpret_cast<uint8_t*>(out);
+
+  // First handle suffix.
+  const int64_t num_processed_elements = (num_blocks * kBlockSize) / kNumStreams;
+  for (int64_t i = num_processed_elements; i < num_values; ++i) {
+    uint8_t gathered_byte_data[kNumStreams];
+    for (size_t b = 0; b < kNumStreams; ++b) {
+      const size_t byte_index = b * stride + i;
+      gathered_byte_data[b] = data[byte_index];
+    }
+    out[i] = arrow::util::SafeLoadAs<T>(&gathered_byte_data[0]);
+  }
+
+  // Processed hierarchically using unpack intrinsics, then permute intrinsics.
+  __m256i stage[kNumStreamsLog2 + 1U][kNumStreams];
+  __m256i final_result[kNumStreams];
+  constexpr size_t kNumStreamsHalf = kNumStreams / 2U;
+
+  for (int64_t i = 0; i < num_blocks; ++i) {
+    for (size_t j = 0; j < kNumStreams; ++j) {
+      stage[0][j] = _mm256_loadu_si256(
+          reinterpret_cast<const __m256i*>(&data[i * sizeof(__m256i) + j * stride]));
+    }
+
+    for (size_t step = 0; step < kNumStreamsLog2; ++step) {
+      for (size_t j = 0; j < kNumStreamsHalf; ++j) {
+        stage[step + 1U][j * 2] =
+            _mm256_unpacklo_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]);
+        stage[step + 1U][j * 2 + 1U] =
+            _mm256_unpackhi_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]);
+      }
+    }
+
+    if (kNumStreams == 8U) {
+      // path for double, 128i index:
+      //   {0x00, 0x08}, {0x01, 0x09}, {0x02, 0x0A}, {0x03, 0x0B},
+      //   {0x04, 0x0C}, {0x05, 0x0D}, {0x06, 0x0E}, {0x07, 0x0F},
+      final_result[0] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][0],
+                                                  stage[kNumStreamsLog2][1], 0b00100000);
+      final_result[1] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][2],
+                                                  stage[kNumStreamsLog2][3], 0b00100000);
+      final_result[2] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][4],
+                                                  stage[kNumStreamsLog2][5], 0b00100000);
+      final_result[3] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][6],
+                                                  stage[kNumStreamsLog2][7], 0b00100000);
+      final_result[4] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][0],
+                                                  stage[kNumStreamsLog2][1], 0b00110001);
+      final_result[5] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][2],
+                                                  stage[kNumStreamsLog2][3], 0b00110001);
+      final_result[6] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][4],
+                                                  stage[kNumStreamsLog2][5], 0b00110001);
+      final_result[7] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][6],
+                                                  stage[kNumStreamsLog2][7], 0b00110001);
+    } else {
+      // path for float, 128i index:
+      //   {0x00, 0x04}, {0x01, 0x05}, {0x02, 0x06}, {0x03, 0x07}
+      final_result[0] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][0],
+                                                  stage[kNumStreamsLog2][1], 0b00100000);
+      final_result[1] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][2],
+                                                  stage[kNumStreamsLog2][3], 0b00100000);
+      final_result[2] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][0],
+                                                  stage[kNumStreamsLog2][1], 0b00110001);
+      final_result[3] = _mm256_permute2x128_si256(stage[kNumStreamsLog2][2],
+                                                  stage[kNumStreamsLog2][3], 0b00110001);
+    }
+
+    for (size_t j = 0; j < kNumStreams; ++j) {
+      _mm256_storeu_si256(reinterpret_cast<__m256i*>(
+                              &output_data[(i * kNumStreams + j) * sizeof(__m256i)]),
+                          final_result[j]);
+    }
+  }
+}
+
+template <typename T>
+void ByteStreamSplitEncodeAvx2(const uint8_t* raw_values, const size_t num_values,
+                               uint8_t* output_buffer_raw) {
+  constexpr size_t kNumStreams = sizeof(T);
+  static_assert(kNumStreams == 4U || kNumStreams == 8U, "Invalid number of streams.");
+  if (kNumStreams == 8U)  // Back to SSE, currently no path for double.
+    return ByteStreamSplitEncodeSse2<T>(raw_values, num_values, output_buffer_raw);
+
+  const size_t size = num_values * sizeof(T);
+  constexpr size_t kBlockSize = sizeof(__m256i) * kNumStreams;
+  if (size < kBlockSize)  // Back to SSE for small size
+    return ByteStreamSplitEncodeSse2<T>(raw_values, num_values, output_buffer_raw);
+  const size_t num_blocks = size / kBlockSize;
+  const __m256i* raw_values_simd = reinterpret_cast<const __m256i*>(raw_values);
+  __m256i* output_buffer_streams[kNumStreams];
+
+  for (size_t i = 0; i < kNumStreams; ++i) {
+    output_buffer_streams[i] =
+        reinterpret_cast<__m256i*>(&output_buffer_raw[num_values * i]);
+  }
+
+  // First handle suffix.
+  const size_t num_processed_elements = (num_blocks * kBlockSize) / sizeof(T);
+  for (size_t i = num_processed_elements; i < num_values; ++i) {
+    for (size_t j = 0U; j < kNumStreams; ++j) {
+      const uint8_t byte_in_value = raw_values[i * kNumStreams + j];
+      output_buffer_raw[j * num_values + i] = byte_in_value;
+    }
+  }
+
+  // Path for float.
+  // 1. Processed hierarchically to 32i blcok using the unpack intrinsics.
+  // 2. Pack 128i block using _mm256_permutevar8x32_epi32.
+  // 3. Pack final 256i block with _mm256_permute2x128_si256.
+  constexpr size_t kNumUnpack = 3U;
+  __m256i stage[kNumUnpack + 1][kNumStreams];
+  static const __m256i kPermuteMask =
+      _mm256_set_epi32(0x07, 0x03, 0x06, 0x02, 0x05, 0x01, 0x04, 0x00);
+  __m256i permute[kNumStreams];
+  __m256i final_result[kNumStreams];
+
+  for (size_t block_index = 0; block_index < num_blocks; ++block_index) {
+    for (size_t i = 0; i < kNumStreams; ++i) {
+      stage[0][i] = _mm256_loadu_si256(&raw_values_simd[block_index * kNumStreams + i]);
+    }
+
+    for (size_t stage_lvl = 0; stage_lvl < kNumUnpack; ++stage_lvl) {
+      for (size_t i = 0; i < kNumStreams / 2U; ++i) {
+        stage[stage_lvl + 1][i * 2] =
+            _mm256_unpacklo_epi8(stage[stage_lvl][i * 2], stage[stage_lvl][i * 2 + 1]);
+        stage[stage_lvl + 1][i * 2 + 1] =
+            _mm256_unpackhi_epi8(stage[stage_lvl][i * 2], stage[stage_lvl][i * 2 + 1]);
+      }
+    }
+
+    for (size_t i = 0; i < kNumStreams; ++i) {
+      permute[i] = _mm256_permutevar8x32_epi32(stage[kNumUnpack][i], kPermuteMask);
+    }
+
+    final_result[0] = _mm256_permute2x128_si256(permute[0], permute[2], 0b00100000);
+    final_result[1] = _mm256_permute2x128_si256(permute[0], permute[2], 0b00110001);
+    final_result[2] = _mm256_permute2x128_si256(permute[1], permute[3], 0b00100000);
+    final_result[3] = _mm256_permute2x128_si256(permute[1], permute[3], 0b00110001);
+
+    for (size_t i = 0; i < kNumStreams; ++i) {
+      _mm256_storeu_si256(&output_buffer_streams[i][block_index], final_result[i]);
+    }
+  }
+}
+#endif  // ARROW_HAVE_AVX2
+
+#if defined(ARROW_HAVE_AVX512)
+template <typename T>
+void ByteStreamSplitDecodeAvx512(const uint8_t* data, int64_t num_values, int64_t stride,
+                                 T* out) {
+  constexpr size_t kNumStreams = sizeof(T);
+  static_assert(kNumStreams == 4U || kNumStreams == 8U, "Invalid number of streams.");
+  constexpr size_t kNumStreamsLog2 = (kNumStreams == 8U ? 3U : 2U);
+
+  const int64_t size = num_values * sizeof(T);
+  constexpr int64_t kBlockSize = sizeof(__m512i) * kNumStreams;
+  if (size < kBlockSize)  // Back to AVX2 for small size
+    return ByteStreamSplitDecodeAvx2(data, num_values, stride, out);
+  const int64_t num_blocks = size / kBlockSize;
+  uint8_t* output_data = reinterpret_cast<uint8_t*>(out);
+
+  // First handle suffix.
+  const int64_t num_processed_elements = (num_blocks * kBlockSize) / kNumStreams;
+  for (int64_t i = num_processed_elements; i < num_values; ++i) {
+    uint8_t gathered_byte_data[kNumStreams];
+    for (size_t b = 0; b < kNumStreams; ++b) {
+      const size_t byte_index = b * stride + i;
+      gathered_byte_data[b] = data[byte_index];
+    }
+    out[i] = arrow::util::SafeLoadAs<T>(&gathered_byte_data[0]);
+  }
+
+  // Processed hierarchically using the unpack, then two shuffles.
+  __m512i stage[kNumStreamsLog2 + 1U][kNumStreams];
+  __m512i shuffle[kNumStreams];
+  __m512i final_result[kNumStreams];
+  constexpr size_t kNumStreamsHalf = kNumStreams / 2U;
+
+  for (int64_t i = 0; i < num_blocks; ++i) {
+    for (size_t j = 0; j < kNumStreams; ++j) {
+      stage[0][j] = _mm512_loadu_si512(
+          reinterpret_cast<const __m512i*>(&data[i * sizeof(__m512i) + j * stride]));
+    }
+
+    for (size_t step = 0; step < kNumStreamsLog2; ++step) {
+      for (size_t j = 0; j < kNumStreamsHalf; ++j) {
+        stage[step + 1U][j * 2] =
+            _mm512_unpacklo_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]);
+        stage[step + 1U][j * 2 + 1U] =
+            _mm512_unpackhi_epi8(stage[step][j], stage[step][kNumStreamsHalf + j]);
+      }
+    }
+
+    if (kNumStreams == 8U) {
+      // path for double, 128i index:
+      // {0x00, 0x04, 0x08, 0x0C}, {0x10, 0x14, 0x18, 0x1C},
+      // {0x01, 0x05, 0x09, 0x0D}, {0x11, 0x15, 0x19, 0x1D},
+      // {0x02, 0x06, 0x0A, 0x0E}, {0x12, 0x16, 0x1A, 0x1E},
+      // {0x03, 0x07, 0x0B, 0x0F}, {0x13, 0x17, 0x1B, 0x1F},
+      shuffle[0] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][0],
+                                        stage[kNumStreamsLog2][1], 0b01000100);
+      shuffle[1] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][2],
+                                        stage[kNumStreamsLog2][3], 0b01000100);
+      shuffle[2] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][4],
+                                        stage[kNumStreamsLog2][5], 0b01000100);
+      shuffle[3] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][6],
+                                        stage[kNumStreamsLog2][7], 0b01000100);
+      shuffle[4] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][0],
+                                        stage[kNumStreamsLog2][1], 0b11101110);
+      shuffle[5] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][2],
+                                        stage[kNumStreamsLog2][3], 0b11101110);
+      shuffle[6] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][4],
+                                        stage[kNumStreamsLog2][5], 0b11101110);
+      shuffle[7] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][6],
+                                        stage[kNumStreamsLog2][7], 0b11101110);
+
+      final_result[0] = _mm512_shuffle_i32x4(shuffle[0], shuffle[1], 0b10001000);
+      final_result[1] = _mm512_shuffle_i32x4(shuffle[2], shuffle[3], 0b10001000);
+      final_result[2] = _mm512_shuffle_i32x4(shuffle[0], shuffle[1], 0b11011101);
+      final_result[3] = _mm512_shuffle_i32x4(shuffle[2], shuffle[3], 0b11011101);
+      final_result[4] = _mm512_shuffle_i32x4(shuffle[4], shuffle[5], 0b10001000);
+      final_result[5] = _mm512_shuffle_i32x4(shuffle[6], shuffle[7], 0b10001000);
+      final_result[6] = _mm512_shuffle_i32x4(shuffle[4], shuffle[5], 0b11011101);
+      final_result[7] = _mm512_shuffle_i32x4(shuffle[6], shuffle[7], 0b11011101);
+    } else {
+      // path for float, 128i index:
+      // {0x00, 0x04, 0x08, 0x0C}, {0x01, 0x05, 0x09, 0x0D}
+      // {0x02, 0x06, 0x0A, 0x0E}, {0x03, 0x07, 0x0B, 0x0F},
+      shuffle[0] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][0],
+                                        stage[kNumStreamsLog2][1], 0b01000100);
+      shuffle[1] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][2],
+                                        stage[kNumStreamsLog2][3], 0b01000100);
+      shuffle[2] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][0],
+                                        stage[kNumStreamsLog2][1], 0b11101110);
+      shuffle[3] = _mm512_shuffle_i32x4(stage[kNumStreamsLog2][2],
+                                        stage[kNumStreamsLog2][3], 0b11101110);
+
+      final_result[0] = _mm512_shuffle_i32x4(shuffle[0], shuffle[1], 0b10001000);
+      final_result[1] = _mm512_shuffle_i32x4(shuffle[0], shuffle[1], 0b11011101);
+      final_result[2] = _mm512_shuffle_i32x4(shuffle[2], shuffle[3], 0b10001000);
+      final_result[3] = _mm512_shuffle_i32x4(shuffle[2], shuffle[3], 0b11011101);
+    }
+
+    for (size_t j = 0; j < kNumStreams; ++j) {
+      _mm512_storeu_si512(reinterpret_cast<__m512i*>(
+                              &output_data[(i * kNumStreams + j) * sizeof(__m512i)]),
+                          final_result[j]);
+    }
+  }
+}
+
+template <typename T>
+void ByteStreamSplitEncodeAvx512(const uint8_t* raw_values, const size_t num_values,
+                                 uint8_t* output_buffer_raw) {
+  constexpr size_t kNumStreams = sizeof(T);
+  static_assert(kNumStreams == 4U || kNumStreams == 8U, "Invalid number of streams.");
+  const size_t size = num_values * sizeof(T);
+  constexpr size_t kBlockSize = sizeof(__m512i) * kNumStreams;
+  if (size < kBlockSize)  // Back to AVX2 for small size
+    return ByteStreamSplitEncodeAvx2<T>(raw_values, num_values, output_buffer_raw);
+
+  const size_t num_blocks = size / kBlockSize;
+  const __m512i* raw_values_simd = reinterpret_cast<const __m512i*>(raw_values);
+  __m512i* output_buffer_streams[kNumStreams];
+  for (size_t i = 0; i < kNumStreams; ++i) {
+    output_buffer_streams[i] =
+        reinterpret_cast<__m512i*>(&output_buffer_raw[num_values * i]);
+  }
+
+  // First handle suffix.
+  const size_t num_processed_elements = (num_blocks * kBlockSize) / sizeof(T);
+  for (size_t i = num_processed_elements; i < num_values; ++i) {
+    for (size_t j = 0U; j < kNumStreams; ++j) {
+      const uint8_t byte_in_value = raw_values[i * kNumStreams + j];
+      output_buffer_raw[j * num_values + i] = byte_in_value;
+    }
+  }
+
+  constexpr size_t KNumUnpack = (kNumStreams == 8U) ? 2U : 3U;
+  __m512i final_result[kNumStreams];
+  __m512i unpack[KNumUnpack + 1][kNumStreams];
+  __m512i permutex[kNumStreams];
+  __m512i permutex_mask;
+  if (kNumStreams == 8U) {
+    // use _mm512_set_epi32, no _mm512_set_epi16 for some old gcc version.
+    permutex_mask = _mm512_set_epi32(0x001F0017, 0x000F0007, 0x001E0016, 0x000E0006,
+                                     0x001D0015, 0x000D0005, 0x001C0014, 0x000C0004,
+                                     0x001B0013, 0x000B0003, 0x001A0012, 0x000A0002,
+                                     0x00190011, 0x00090001, 0x00180010, 0x00080000);
+  } else {
+    permutex_mask = _mm512_set_epi32(0x0F, 0x0B, 0x07, 0x03, 0x0E, 0x0A, 0x06, 0x02, 0x0D,
+                                     0x09, 0x05, 0x01, 0x0C, 0x08, 0x04, 0x00);
+  }
+
+  for (size_t block_index = 0; block_index < num_blocks; ++block_index) {
+    for (size_t i = 0; i < kNumStreams; ++i) {
+      unpack[0][i] = _mm512_loadu_si512(&raw_values_simd[block_index * kNumStreams + i]);
+    }
+
+    for (size_t unpack_lvl = 0; unpack_lvl < KNumUnpack; ++unpack_lvl) {
+      for (size_t i = 0; i < kNumStreams / 2U; ++i) {
+        unpack[unpack_lvl + 1][i * 2] = _mm512_unpacklo_epi8(
+            unpack[unpack_lvl][i * 2], unpack[unpack_lvl][i * 2 + 1]);
+        unpack[unpack_lvl + 1][i * 2 + 1] = _mm512_unpackhi_epi8(
+            unpack[unpack_lvl][i * 2], unpack[unpack_lvl][i * 2 + 1]);
+      }
+    }
+
+    if (kNumStreams == 8U) {
+      // path for double
+      // 1. unpack to epi16 block
+      // 2. permutexvar_epi16 to 128i block
+      // 3. shuffle 128i to final 512i target, index:
+      //   {0x00, 0x04, 0x08, 0x0C}, {0x10, 0x14, 0x18, 0x1C},
+      //   {0x01, 0x05, 0x09, 0x0D}, {0x11, 0x15, 0x19, 0x1D},
+      //   {0x02, 0x06, 0x0A, 0x0E}, {0x12, 0x16, 0x1A, 0x1E},
+      //   {0x03, 0x07, 0x0B, 0x0F}, {0x13, 0x17, 0x1B, 0x1F},
+      for (size_t i = 0; i < kNumStreams; ++i)
+        permutex[i] = _mm512_permutexvar_epi16(permutex_mask, unpack[KNumUnpack][i]);
+
+      __m512i shuffle[kNumStreams];
+      shuffle[0] = _mm512_shuffle_i32x4(permutex[0], permutex[2], 0b01000100);
+      shuffle[1] = _mm512_shuffle_i32x4(permutex[4], permutex[6], 0b01000100);
+      shuffle[2] = _mm512_shuffle_i32x4(permutex[0], permutex[2], 0b11101110);
+      shuffle[3] = _mm512_shuffle_i32x4(permutex[4], permutex[6], 0b11101110);
+      shuffle[4] = _mm512_shuffle_i32x4(permutex[1], permutex[3], 0b01000100);
+      shuffle[5] = _mm512_shuffle_i32x4(permutex[5], permutex[7], 0b01000100);
+      shuffle[6] = _mm512_shuffle_i32x4(permutex[1], permutex[3], 0b11101110);
+      shuffle[7] = _mm512_shuffle_i32x4(permutex[5], permutex[7], 0b11101110);
+
+      final_result[0] = _mm512_shuffle_i32x4(shuffle[0], shuffle[1], 0b10001000);
+      final_result[1] = _mm512_shuffle_i32x4(shuffle[0], shuffle[1], 0b11011101);
+      final_result[2] = _mm512_shuffle_i32x4(shuffle[2], shuffle[3], 0b10001000);
+      final_result[3] = _mm512_shuffle_i32x4(shuffle[2], shuffle[3], 0b11011101);
+      final_result[4] = _mm512_shuffle_i32x4(shuffle[4], shuffle[5], 0b10001000);
+      final_result[5] = _mm512_shuffle_i32x4(shuffle[4], shuffle[5], 0b11011101);
+      final_result[6] = _mm512_shuffle_i32x4(shuffle[6], shuffle[7], 0b10001000);
+      final_result[7] = _mm512_shuffle_i32x4(shuffle[6], shuffle[7], 0b11011101);
+    } else {
+      // Path for float.
+      // 1. Processed hierarchically to 32i blcok using the unpack intrinsics.
+      // 2. Pack 128i block using _mm256_permutevar8x32_epi32.
+      // 3. Pack final 256i block with _mm256_permute2x128_si256.
+      for (size_t i = 0; i < kNumStreams; ++i)
+        permutex[i] = _mm512_permutexvar_epi32(permutex_mask, unpack[KNumUnpack][i]);
+
+      final_result[0] = _mm512_shuffle_i32x4(permutex[0], permutex[2], 0b01000100);
+      final_result[1] = _mm512_shuffle_i32x4(permutex[0], permutex[2], 0b11101110);
+      final_result[2] = _mm512_shuffle_i32x4(permutex[1], permutex[3], 0b01000100);
+      final_result[3] = _mm512_shuffle_i32x4(permutex[1], permutex[3], 0b11101110);
+    }
+
+    for (size_t i = 0; i < kNumStreams; ++i) {
+      _mm512_storeu_si512(&output_buffer_streams[i][block_index], final_result[i]);
+    }
+  }
+}
+#endif  // ARROW_HAVE_AVX512
+
+#if defined(ARROW_HAVE_SIMD_SPLIT)
+template <typename T>
+void inline ByteStreamSplitDecodeSimd(const uint8_t* data, int64_t num_values,
+                                      int64_t stride, T* out) {
+#if defined(ARROW_HAVE_AVX512)
+  return ByteStreamSplitDecodeAvx512(data, num_values, stride, out);
+#elif defined(ARROW_HAVE_AVX2)
+  return ByteStreamSplitDecodeAvx2(data, num_values, stride, out);
+#elif defined(ARROW_HAVE_SSE4_2)
+  return ByteStreamSplitDecodeSse2(data, num_values, stride, out);
+#else
+#error "ByteStreamSplitDecodeSimd not implemented"
+#endif
+}
+
+template <typename T>
+void inline ByteStreamSplitEncodeSimd(const uint8_t* raw_values, const size_t num_values,
+                                      uint8_t* output_buffer_raw) {
+#if defined(ARROW_HAVE_AVX512)
+  return ByteStreamSplitEncodeAvx512<T>(raw_values, num_values, output_buffer_raw);
+#elif defined(ARROW_HAVE_AVX2)
+  return ByteStreamSplitEncodeAvx2<T>(raw_values, num_values, output_buffer_raw);
+#elif defined(ARROW_HAVE_SSE4_2)
+  return ByteStreamSplitEncodeSse2<T>(raw_values, num_values, output_buffer_raw);
+#else
+#error "ByteStreamSplitEncodeSimd not implemented"
+#endif
+}
+#endif
+
+template <typename T>
+void ByteStreamSplitEncodeScalar(const uint8_t* raw_values, const size_t num_values,
+                                 uint8_t* output_buffer_raw) {
+  constexpr size_t kNumStreams = sizeof(T);
+  for (size_t i = 0U; i < num_values; ++i) {
+    for (size_t j = 0U; j < kNumStreams; ++j) {
+      const uint8_t byte_in_value = raw_values[i * kNumStreams + j];
+      output_buffer_raw[j * num_values + i] = byte_in_value;
+    }
+  }
+}
+
+template <typename T>
+void ByteStreamSplitDecodeScalar(const uint8_t* data, int64_t num_values, int64_t stride,
+                                 T* out) {
+  constexpr size_t kNumStreams = sizeof(T);
+  auto output_buffer_raw = reinterpret_cast<uint8_t*>(out);
+
+  for (int64_t i = 0; i < num_values; ++i) {
+    for (size_t b = 0; b < kNumStreams; ++b) {
+      const size_t byte_index = b * stride + i;
+      output_buffer_raw[i * kNumStreams + b] = data[byte_index];
+    }
+  }
+}
+
+template <typename T>
+void inline ByteStreamSplitEncode(const uint8_t* raw_values, const size_t num_values,
+                                  uint8_t* output_buffer_raw) {
+#if defined(ARROW_HAVE_SIMD_SPLIT)
+  return ByteStreamSplitEncodeSimd<T>(raw_values, num_values, output_buffer_raw);
+#else
+  return ByteStreamSplitEncodeScalar<T>(raw_values, num_values, output_buffer_raw);
+#endif
+}
+
+template <typename T>
+void inline ByteStreamSplitDecode(const uint8_t* data, int64_t num_values, int64_t stride,
+                                  T* out) {
+#if defined(ARROW_HAVE_SIMD_SPLIT)
+  return ByteStreamSplitDecodeSimd(data, num_values, stride, out);
+#else
+  return ByteStreamSplitDecodeScalar(data, num_values, stride, out);
+#endif
+}
+
+}  // namespace internal
+}  // namespace util
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bytes_view.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/bytes_view.h
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <string_view>
+
+namespace arrow {
+namespace util {
+
+using bytes_view = std::basic_string_view<uint8_t>;
+
+}  // namespace util
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/cancel.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/cancel.h
@@ -0,0 +1,118 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class StopToken;
+
+struct StopSourceImpl;
+
+/// EXPERIMENTAL
+class ARROW_EXPORT StopSource {
+ public:
+  StopSource();
+  ~StopSource();
+
+  // Consumer API (the side that stops)
+  void RequestStop();
+  void RequestStop(Status error);
+  // Async-signal-safe. TODO Deprecate this?
+  void RequestStopFromSignal(int signum);
+
+  StopToken token();
+
+  // For internal use only
+  void Reset();
+
+ protected:
+  std::shared_ptr<StopSourceImpl> impl_;
+};
+
+/// EXPERIMENTAL
+class ARROW_EXPORT StopToken {
+ public:
+  // Public for Cython
+  StopToken() {}
+
+  explicit StopToken(std::shared_ptr<StopSourceImpl> impl) : impl_(std::move(impl)) {}
+
+  // A trivial token that never propagates any stop request
+  static StopToken Unstoppable() { return StopToken(); }
+
+  /// \brief Check if the stop source has been cancelled.
+  ///
+  /// Producers should call this method, whenever convenient, to check and
+  /// see if they should stop producing early (i.e. have been cancelled).
+  /// Failure to call this method often enough will lead to an unresponsive
+  /// cancellation.
+  ///
+  /// This is part of the producer API (the side that gets asked to stop)
+  /// This method is thread-safe
+  ///
+  /// \return An OK status if the stop source has not been cancelled or a
+  ///         cancel error if the source has been cancelled.
+  Status Poll() const;
+  bool IsStopRequested() const;
+
+ protected:
+  std::shared_ptr<StopSourceImpl> impl_;
+};
+
+/// EXPERIMENTAL: Set a global StopSource that can receive signals
+///
+/// The only allowed order of calls is the following:
+/// - SetSignalStopSource()
+/// - any number of pairs of (RegisterCancellingSignalHandler,
+///   UnregisterCancellingSignalHandler) calls
+/// - ResetSignalStopSource()
+///
+/// Beware that these settings are process-wide.  Typically, only one
+/// thread should call these APIs, even in a multithreaded setting.
+ARROW_EXPORT
+Result<StopSource*> SetSignalStopSource();
+
+/// EXPERIMENTAL: Reset the global signal-receiving StopSource
+///
+/// This will invalidate the pointer returned by SetSignalStopSource.
+ARROW_EXPORT
+void ResetSignalStopSource();
+
+/// EXPERIMENTAL: Register signal handler triggering the signal-receiving StopSource
+///
+/// Note that those handlers are automatically un-registered in a fork()ed process,
+/// therefore the child process will need to call RegisterCancellingSignalHandler()
+/// if desired.
+ARROW_EXPORT
+Status RegisterCancellingSignalHandler(const std::vector<int>& signals);
+
+/// EXPERIMENTAL: Unregister signal handler set up by RegisterCancellingSignalHandler
+ARROW_EXPORT
+void UnregisterCancellingSignalHandler();
+
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/checked_cast.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/checked_cast.h
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <type_traits>
+#include <utility>
+
+namespace arrow {
+namespace internal {
+
+template <typename OutputType, typename InputType>
+inline OutputType checked_cast(InputType&& value) {
+  static_assert(std::is_class<typename std::remove_pointer<
+                    typename std::remove_reference<InputType>::type>::type>::value,
+                "checked_cast input type must be a class");
+  static_assert(std::is_class<typename std::remove_pointer<
+                    typename std::remove_reference<OutputType>::type>::type>::value,
+                "checked_cast output type must be a class");
+#ifdef NDEBUG
+  return static_cast<OutputType>(value);
+#else
+  return dynamic_cast<OutputType>(value);
+#endif
+}
+
+template <class T, class U>
+std::shared_ptr<T> checked_pointer_cast(std::shared_ptr<U> r) noexcept {
+#ifdef NDEBUG
+  return std::static_pointer_cast<T>(std::move(r));
+#else
+  return std::dynamic_pointer_cast<T>(std::move(r));
+#endif
+}
+
+template <class T, class U>
+std::unique_ptr<T> checked_pointer_cast(std::unique_ptr<U> r) noexcept {
+#ifdef NDEBUG
+  return std::unique_ptr<T>(static_cast<T*>(r.release()));
+#else
+  return std::unique_ptr<T>(dynamic_cast<T*>(r.release()));
+#endif
+}
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/compare.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/compare.h
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <type_traits>
+#include <utility>
+
+#include "arrow/util/macros.h"
+
+namespace arrow {
+namespace util {
+
+/// CRTP helper for declaring equality comparison. Defines operator== and operator!=
+template <typename T>
+class EqualityComparable {
+ public:
+  ~EqualityComparable() {
+    static_assert(
+        std::is_same<decltype(std::declval<const T>().Equals(std::declval<const T>())),
+                     bool>::value,
+        "EqualityComparable depends on the method T::Equals(const T&) const");
+  }
+
+  template <typename... Extra>
+  bool Equals(const std::shared_ptr<T>& other, Extra&&... extra) const {
+    if (other == NULLPTR) {
+      return false;
+    }
+    return cast().Equals(*other, std::forward<Extra>(extra)...);
+  }
+
+  struct PtrsEqual {
+    bool operator()(const std::shared_ptr<T>& l, const std::shared_ptr<T>& r) const {
+      return l->Equals(r);
+    }
+  };
+
+  bool operator==(const T& other) const { return cast().Equals(other); }
+  bool operator!=(const T& other) const { return !(cast() == other); }
+
+ private:
+  const T& cast() const { return static_cast<const T&>(*this); }
+};
+
+}  // namespace util
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/compression.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/compression.h
@@ -0,0 +1,202 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace util {
+
+constexpr int kUseDefaultCompressionLevel = std::numeric_limits<int>::min();
+
+/// \brief Streaming compressor interface
+///
+class ARROW_EXPORT Compressor {
+ public:
+  virtual ~Compressor() = default;
+
+  struct CompressResult {
+    int64_t bytes_read;
+    int64_t bytes_written;
+  };
+  struct FlushResult {
+    int64_t bytes_written;
+    bool should_retry;
+  };
+  struct EndResult {
+    int64_t bytes_written;
+    bool should_retry;
+  };
+
+  /// \brief Compress some input.
+  ///
+  /// If bytes_read is 0 on return, then a larger output buffer should be supplied.
+  virtual Result<CompressResult> Compress(int64_t input_len, const uint8_t* input,
+                                          int64_t output_len, uint8_t* output) = 0;
+
+  /// \brief Flush part of the compressed output.
+  ///
+  /// If should_retry is true on return, Flush() should be called again
+  /// with a larger buffer.
+  virtual Result<FlushResult> Flush(int64_t output_len, uint8_t* output) = 0;
+
+  /// \brief End compressing, doing whatever is necessary to end the stream.
+  ///
+  /// If should_retry is true on return, End() should be called again
+  /// with a larger buffer.  Otherwise, the Compressor should not be used anymore.
+  ///
+  /// End() implies Flush().
+  virtual Result<EndResult> End(int64_t output_len, uint8_t* output) = 0;
+
+  // XXX add methods for buffer size heuristics?
+};
+
+/// \brief Streaming decompressor interface
+///
+class ARROW_EXPORT Decompressor {
+ public:
+  virtual ~Decompressor() = default;
+
+  struct DecompressResult {
+    // XXX is need_more_output necessary? (Brotli?)
+    int64_t bytes_read;
+    int64_t bytes_written;
+    bool need_more_output;
+  };
+
+  /// \brief Decompress some input.
+  ///
+  /// If need_more_output is true on return, a larger output buffer needs
+  /// to be supplied.
+  virtual Result<DecompressResult> Decompress(int64_t input_len, const uint8_t* input,
+                                              int64_t output_len, uint8_t* output) = 0;
+
+  /// \brief Return whether the compressed stream is finished.
+  ///
+  /// This is a heuristic.  If true is returned, then it is guaranteed
+  /// that the stream is finished.  If false is returned, however, it may
+  /// simply be that the underlying library isn't able to provide the information.
+  virtual bool IsFinished() = 0;
+
+  /// \brief Reinitialize decompressor, making it ready for a new compressed stream.
+  virtual Status Reset() = 0;
+
+  // XXX add methods for buffer size heuristics?
+};
+
+/// \brief Compression codec
+class ARROW_EXPORT Codec {
+ public:
+  virtual ~Codec() = default;
+
+  /// \brief Return special value to indicate that a codec implementation
+  /// should use its default compression level
+  static int UseDefaultCompressionLevel();
+
+  /// \brief Return a string name for compression type
+  static const std::string& GetCodecAsString(Compression::type t);
+
+  /// \brief Return compression type for name (all lower case)
+  static Result<Compression::type> GetCompressionType(const std::string& name);
+
+  /// \brief Create a codec for the given compression algorithm
+  static Result<std::unique_ptr<Codec>> Create(
+      Compression::type codec, int compression_level = kUseDefaultCompressionLevel);
+
+  /// \brief Return true if support for indicated codec has been enabled
+  static bool IsAvailable(Compression::type codec);
+
+  /// \brief Return true if indicated codec supports setting a compression level
+  static bool SupportsCompressionLevel(Compression::type codec);
+
+  /// \brief Return the smallest supported compression level for the codec
+  /// Note: This function creates a temporary Codec instance
+  static Result<int> MinimumCompressionLevel(Compression::type codec);
+
+  /// \brief Return the largest supported compression level for the codec
+  /// Note: This function creates a temporary Codec instance
+  static Result<int> MaximumCompressionLevel(Compression::type codec);
+
+  /// \brief Return the default compression level
+  /// Note: This function creates a temporary Codec instance
+  static Result<int> DefaultCompressionLevel(Compression::type codec);
+
+  /// \brief Return the smallest supported compression level
+  virtual int minimum_compression_level() const = 0;
+
+  /// \brief Return the largest supported compression level
+  virtual int maximum_compression_level() const = 0;
+
+  /// \brief Return the default compression level
+  virtual int default_compression_level() const = 0;
+
+  /// \brief One-shot decompression function
+  ///
+  /// output_buffer_len must be correct and therefore be obtained in advance.
+  /// The actual decompressed length is returned.
+  ///
+  /// \note One-shot decompression is not always compatible with streaming
+  /// compression.  Depending on the codec (e.g. LZ4), different formats may
+  /// be used.
+  virtual Result<int64_t> Decompress(int64_t input_len, const uint8_t* input,
+                                     int64_t output_buffer_len,
+                                     uint8_t* output_buffer) = 0;
+
+  /// \brief One-shot compression function
+  ///
+  /// output_buffer_len must first have been computed using MaxCompressedLen().
+  /// The actual compressed length is returned.
+  ///
+  /// \note One-shot compression is not always compatible with streaming
+  /// decompression.  Depending on the codec (e.g. LZ4), different formats may
+  /// be used.
+  virtual Result<int64_t> Compress(int64_t input_len, const uint8_t* input,
+                                   int64_t output_buffer_len, uint8_t* output_buffer) = 0;
+
+  virtual int64_t MaxCompressedLen(int64_t input_len, const uint8_t* input) = 0;
+
+  /// \brief Create a streaming compressor instance
+  virtual Result<std::shared_ptr<Compressor>> MakeCompressor() = 0;
+
+  /// \brief Create a streaming compressor instance
+  virtual Result<std::shared_ptr<Decompressor>> MakeDecompressor() = 0;
+
+  /// \brief This Codec's compression type
+  virtual Compression::type compression_type() const = 0;
+
+  /// \brief The name of this Codec's compression type
+  const std::string& name() const { return GetCodecAsString(compression_type()); }
+
+  /// \brief This Codec's compression level, if applicable
+  virtual int compression_level() const { return UseDefaultCompressionLevel(); }
+
+ private:
+  /// \brief Initializes the codec's resources.
+  virtual Status Init();
+};
+
+}  // namespace util
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/concurrent_map.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/concurrent_map.h
@@ -0,0 +1,68 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <unordered_map>
+#include <utility>
+
+#include "arrow/util/mutex.h"
+
+namespace arrow {
+namespace util {
+
+template <typename K, typename V>
+class ConcurrentMap {
+ public:
+  void Insert(const K& key, const V& value) {
+    auto lock = mutex_.Lock();
+    map_.insert({key, value});
+  }
+
+  template <typename ValueFunc>
+  V GetOrInsert(const K& key, ValueFunc&& compute_value_func) {
+    auto lock = mutex_.Lock();
+    auto it = map_.find(key);
+    if (it == map_.end()) {
+      auto pair = map_.emplace(key, compute_value_func());
+      it = pair.first;
+    }
+    return it->second;
+  }
+
+  void Erase(const K& key) {
+    auto lock = mutex_.Lock();
+    map_.erase(key);
+  }
+
+  void Clear() {
+    auto lock = mutex_.Lock();
+    map_.clear();
+  }
+
+  size_t size() const {
+    auto lock = mutex_.Lock();
+    return map_.size();
+  }
+
+ private:
+  std::unordered_map<K, V> map_;
+  mutable arrow::util::Mutex mutex_;
+};
+
+}  // namespace util
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/config.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/config.h
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#define ARROW_VERSION_MAJOR 11
+#define ARROW_VERSION_MINOR 0
+#define ARROW_VERSION_PATCH 0
+#define ARROW_VERSION ((ARROW_VERSION_MAJOR * 1000) + ARROW_VERSION_MINOR) * 1000 + ARROW_VERSION_PATCH
+
+#define ARROW_VERSION_STRING "11.0.0"
+
+#define ARROW_SO_VERSION "1100"
+#define ARROW_FULL_SO_VERSION "1100.0.0"
+
+#define ARROW_CXX_COMPILER_ID "AppleClang"
+#define ARROW_CXX_COMPILER_VERSION "14.0.0.14000029"
+#define ARROW_CXX_COMPILER_FLAGS " -Qunused-arguments -fcolor-diagnostics"
+
+#define ARROW_BUILD_TYPE "RELEASE"
+
+#define ARROW_GIT_ID "f10f5cfd1376fb0e602334588b3f3624d41dee7d"
+#define ARROW_GIT_DESCRIPTION ""
+
+#define ARROW_PACKAGE_KIND "python-wheel-macos"
+
+#define ARROW_COMPUTE
+#define ARROW_CSV
+/* #undef ARROW_CUDA */
+#define ARROW_DATASET
+#define ARROW_FILESYSTEM
+#define ARROW_FLIGHT
+/* #undef ARROW_FLIGHT_SQL */
+#define ARROW_IPC
+#define ARROW_JEMALLOC
+#define ARROW_JEMALLOC_VENDORED
+#define ARROW_JSON
+#define ARROW_ORC
+#define ARROW_PARQUET
+#define ARROW_SUBSTRAIT
+
+#define ARROW_GCS
+#define ARROW_S3
+#define ARROW_USE_NATIVE_INT128
+/* #undef ARROW_WITH_MUSL */
+/* #undef ARROW_WITH_OPENTELEMETRY */
+/* #undef ARROW_WITH_UCX */
+
+#define GRPCPP_PP_INCLUDE
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/converter.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/converter.h
@@ -0,0 +1,411 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/chunked_array.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/visit_type_inline.h"
+
+namespace arrow {
+namespace internal {
+
+template <typename BaseConverter, template <typename...> class ConverterTrait>
+static Result<std::unique_ptr<BaseConverter>> MakeConverter(
+    std::shared_ptr<DataType> type, typename BaseConverter::OptionsType options,
+    MemoryPool* pool);
+
+template <typename Input, typename Options>
+class Converter {
+ public:
+  using Self = Converter<Input, Options>;
+  using InputType = Input;
+  using OptionsType = Options;
+
+  virtual ~Converter() = default;
+
+  Status Construct(std::shared_ptr<DataType> type, OptionsType options,
+                   MemoryPool* pool) {
+    type_ = std::move(type);
+    options_ = std::move(options);
+    return Init(pool);
+  }
+
+  virtual Status Append(InputType value) { return Status::NotImplemented("Append"); }
+
+  virtual Status Extend(InputType values, int64_t size, int64_t offset = 0) {
+    return Status::NotImplemented("Extend");
+  }
+
+  virtual Status ExtendMasked(InputType values, InputType mask, int64_t size,
+                              int64_t offset = 0) {
+    return Status::NotImplemented("ExtendMasked");
+  }
+
+  const std::shared_ptr<ArrayBuilder>& builder() const { return builder_; }
+
+  const std::shared_ptr<DataType>& type() const { return type_; }
+
+  OptionsType options() const { return options_; }
+
+  bool may_overflow() const { return may_overflow_; }
+
+  bool rewind_on_overflow() const { return rewind_on_overflow_; }
+
+  virtual Status Reserve(int64_t additional_capacity) {
+    return builder_->Reserve(additional_capacity);
+  }
+
+  Status AppendNull() { return builder_->AppendNull(); }
+
+  virtual Result<std::shared_ptr<Array>> ToArray() { return builder_->Finish(); }
+
+  virtual Result<std::shared_ptr<Array>> ToArray(int64_t length) {
+    ARROW_ASSIGN_OR_RAISE(auto arr, this->ToArray());
+    return arr->Slice(0, length);
+  }
+
+  virtual Result<std::shared_ptr<ChunkedArray>> ToChunkedArray() {
+    ARROW_ASSIGN_OR_RAISE(auto array, ToArray());
+    std::vector<std::shared_ptr<Array>> chunks = {std::move(array)};
+    return std::make_shared<ChunkedArray>(chunks);
+  }
+
+ protected:
+  virtual Status Init(MemoryPool* pool) { return Status::OK(); }
+
+  std::shared_ptr<DataType> type_;
+  std::shared_ptr<ArrayBuilder> builder_;
+  OptionsType options_;
+  bool may_overflow_ = false;
+  bool rewind_on_overflow_ = false;
+};
+
+template <typename ArrowType, typename BaseConverter>
+class PrimitiveConverter : public BaseConverter {
+ public:
+  using BuilderType = typename TypeTraits<ArrowType>::BuilderType;
+
+ protected:
+  Status Init(MemoryPool* pool) override {
+    this->builder_ = std::make_shared<BuilderType>(this->type_, pool);
+    // Narrow variable-sized binary types may overflow
+    this->may_overflow_ = is_binary_like(this->type_->id());
+    primitive_type_ = checked_cast<const ArrowType*>(this->type_.get());
+    primitive_builder_ = checked_cast<BuilderType*>(this->builder_.get());
+    return Status::OK();
+  }
+
+  const ArrowType* primitive_type_;
+  BuilderType* primitive_builder_;
+};
+
+template <typename ArrowType, typename BaseConverter,
+          template <typename...> class ConverterTrait>
+class ListConverter : public BaseConverter {
+ public:
+  using BuilderType = typename TypeTraits<ArrowType>::BuilderType;
+  using ConverterType = typename ConverterTrait<ArrowType>::type;
+
+ protected:
+  Status Init(MemoryPool* pool) override {
+    list_type_ = checked_cast<const ArrowType*>(this->type_.get());
+    ARROW_ASSIGN_OR_RAISE(value_converter_,
+                          (MakeConverter<BaseConverter, ConverterTrait>(
+                              list_type_->value_type(), this->options_, pool)));
+    this->builder_ =
+        std::make_shared<BuilderType>(pool, value_converter_->builder(), this->type_);
+    list_builder_ = checked_cast<BuilderType*>(this->builder_.get());
+    // Narrow list types may overflow
+    this->may_overflow_ = this->rewind_on_overflow_ =
+        sizeof(typename ArrowType::offset_type) < sizeof(int64_t);
+    return Status::OK();
+  }
+
+  const ArrowType* list_type_;
+  BuilderType* list_builder_;
+  std::unique_ptr<BaseConverter> value_converter_;
+};
+
+template <typename BaseConverter, template <typename...> class ConverterTrait>
+class StructConverter : public BaseConverter {
+ public:
+  using ConverterType = typename ConverterTrait<StructType>::type;
+
+  Status Reserve(int64_t additional_capacity) override {
+    ARROW_RETURN_NOT_OK(this->builder_->Reserve(additional_capacity));
+    for (const auto& child : children_) {
+      ARROW_RETURN_NOT_OK(child->Reserve(additional_capacity));
+    }
+    return Status::OK();
+  }
+
+ protected:
+  Status Init(MemoryPool* pool) override {
+    std::unique_ptr<BaseConverter> child_converter;
+    std::vector<std::shared_ptr<ArrayBuilder>> child_builders;
+
+    struct_type_ = checked_cast<const StructType*>(this->type_.get());
+    for (const auto& field : struct_type_->fields()) {
+      ARROW_ASSIGN_OR_RAISE(child_converter,
+                            (MakeConverter<BaseConverter, ConverterTrait>(
+                                field->type(), this->options_, pool)));
+      this->may_overflow_ |= child_converter->may_overflow();
+      this->rewind_on_overflow_ = this->may_overflow_;
+      child_builders.push_back(child_converter->builder());
+      children_.push_back(std::move(child_converter));
+    }
+
+    this->builder_ =
+        std::make_shared<StructBuilder>(this->type_, pool, std::move(child_builders));
+    struct_builder_ = checked_cast<StructBuilder*>(this->builder_.get());
+
+    return Status::OK();
+  }
+
+  const StructType* struct_type_;
+  StructBuilder* struct_builder_;
+  std::vector<std::unique_ptr<BaseConverter>> children_;
+};
+
+template <typename ValueType, typename BaseConverter>
+class DictionaryConverter : public BaseConverter {
+ public:
+  using BuilderType = DictionaryBuilder<ValueType>;
+
+ protected:
+  Status Init(MemoryPool* pool) override {
+    std::unique_ptr<ArrayBuilder> builder;
+    ARROW_RETURN_NOT_OK(MakeDictionaryBuilder(pool, this->type_, NULLPTR, &builder));
+    this->builder_ = std::move(builder);
+    this->may_overflow_ = false;
+    dict_type_ = checked_cast<const DictionaryType*>(this->type_.get());
+    value_type_ = checked_cast<const ValueType*>(dict_type_->value_type().get());
+    value_builder_ = checked_cast<BuilderType*>(this->builder_.get());
+    return Status::OK();
+  }
+
+  const DictionaryType* dict_type_;
+  const ValueType* value_type_;
+  BuilderType* value_builder_;
+};
+
+template <typename BaseConverter, template <typename...> class ConverterTrait>
+struct MakeConverterImpl {
+  template <typename T, typename ConverterType = typename ConverterTrait<T>::type>
+  Status Visit(const T&) {
+    out.reset(new ConverterType());
+    return out->Construct(std::move(type), std::move(options), pool);
+  }
+
+  Status Visit(const DictionaryType& t) {
+    switch (t.value_type()->id()) {
+#define DICTIONARY_CASE(TYPE)                                                       \
+  case TYPE::type_id:                                                               \
+    out = std::make_unique<                                                         \
+        typename ConverterTrait<DictionaryType>::template dictionary_type<TYPE>>(); \
+    break;
+      DICTIONARY_CASE(BooleanType);
+      DICTIONARY_CASE(Int8Type);
+      DICTIONARY_CASE(Int16Type);
+      DICTIONARY_CASE(Int32Type);
+      DICTIONARY_CASE(Int64Type);
+      DICTIONARY_CASE(UInt8Type);
+      DICTIONARY_CASE(UInt16Type);
+      DICTIONARY_CASE(UInt32Type);
+      DICTIONARY_CASE(UInt64Type);
+      DICTIONARY_CASE(FloatType);
+      DICTIONARY_CASE(DoubleType);
+      DICTIONARY_CASE(BinaryType);
+      DICTIONARY_CASE(StringType);
+      DICTIONARY_CASE(FixedSizeBinaryType);
+#undef DICTIONARY_CASE
+      default:
+        return Status::NotImplemented("DictionaryArray converter for type ", t.ToString(),
+                                      " not implemented");
+    }
+    return out->Construct(std::move(type), std::move(options), pool);
+  }
+
+  Status Visit(const DataType& t) { return Status::NotImplemented(t.name()); }
+
+  std::shared_ptr<DataType> type;
+  typename BaseConverter::OptionsType options;
+  MemoryPool* pool;
+  std::unique_ptr<BaseConverter> out;
+};
+
+template <typename BaseConverter, template <typename...> class ConverterTrait>
+static Result<std::unique_ptr<BaseConverter>> MakeConverter(
+    std::shared_ptr<DataType> type, typename BaseConverter::OptionsType options,
+    MemoryPool* pool) {
+  MakeConverterImpl<BaseConverter, ConverterTrait> visitor{
+      std::move(type), std::move(options), pool, NULLPTR};
+  ARROW_RETURN_NOT_OK(VisitTypeInline(*visitor.type, &visitor));
+  return std::move(visitor.out);
+}
+
+template <typename Converter>
+class Chunker {
+ public:
+  using InputType = typename Converter::InputType;
+
+  explicit Chunker(std::unique_ptr<Converter> converter)
+      : converter_(std::move(converter)) {}
+
+  Status Reserve(int64_t additional_capacity) {
+    ARROW_RETURN_NOT_OK(converter_->Reserve(additional_capacity));
+    reserved_ += additional_capacity;
+    return Status::OK();
+  }
+
+  Status AppendNull() {
+    auto status = converter_->AppendNull();
+    if (ARROW_PREDICT_FALSE(status.IsCapacityError())) {
+      if (converter_->builder()->length() == 0) {
+        // Builder length == 0 means the individual element is too large to append.
+        // In this case, no need to try again.
+        return status;
+      }
+      ARROW_RETURN_NOT_OK(FinishChunk());
+      return converter_->AppendNull();
+    }
+    ++length_;
+    return status;
+  }
+
+  Status Append(InputType value) {
+    auto status = converter_->Append(value);
+    if (ARROW_PREDICT_FALSE(status.IsCapacityError())) {
+      if (converter_->builder()->length() == 0) {
+        return status;
+      }
+      ARROW_RETURN_NOT_OK(FinishChunk());
+      return Append(value);
+    }
+    ++length_;
+    return status;
+  }
+
+  Status Extend(InputType values, int64_t size, int64_t offset = 0) {
+    while (offset < size) {
+      auto length_before = converter_->builder()->length();
+      auto status = converter_->Extend(values, size, offset);
+      auto length_after = converter_->builder()->length();
+      auto num_converted = length_after - length_before;
+
+      offset += num_converted;
+      length_ += num_converted;
+
+      if (status.IsCapacityError()) {
+        if (converter_->builder()->length() == 0) {
+          // Builder length == 0 means the individual element is too large to append.
+          // In this case, no need to try again.
+          return status;
+        } else if (converter_->rewind_on_overflow()) {
+          // The list-like and binary-like conversion paths may raise  a capacity error,
+          // we need to handle them differently. While the binary-like converters check
+          // the capacity before append/extend the list-like converters just check after
+          // append/extend. Thus depending on the implementation semantics we may need
+          // to rewind (slice) the output chunk by one.
+          length_ -= 1;
+          offset -= 1;
+        }
+        ARROW_RETURN_NOT_OK(FinishChunk());
+      } else if (!status.ok()) {
+        return status;
+      }
+    }
+    return Status::OK();
+  }
+
+  Status ExtendMasked(InputType values, InputType mask, int64_t size,
+                      int64_t offset = 0) {
+    while (offset < size) {
+      auto length_before = converter_->builder()->length();
+      auto status = converter_->ExtendMasked(values, mask, size, offset);
+      auto length_after = converter_->builder()->length();
+      auto num_converted = length_after - length_before;
+
+      offset += num_converted;
+      length_ += num_converted;
+
+      if (status.IsCapacityError()) {
+        if (converter_->builder()->length() == 0) {
+          // Builder length == 0 means the individual element is too large to append.
+          // In this case, no need to try again.
+          return status;
+        } else if (converter_->rewind_on_overflow()) {
+          // The list-like and binary-like conversion paths may raise  a capacity error,
+          // we need to handle them differently. While the binary-like converters check
+          // the capacity before append/extend the list-like converters just check after
+          // append/extend. Thus depending on the implementation semantics we may need
+          // to rewind (slice) the output chunk by one.
+          length_ -= 1;
+          offset -= 1;
+        }
+        ARROW_RETURN_NOT_OK(FinishChunk());
+      } else if (!status.ok()) {
+        return status;
+      }
+    }
+    return Status::OK();
+  }
+
+  Status FinishChunk() {
+    ARROW_ASSIGN_OR_RAISE(auto chunk, converter_->ToArray(length_));
+    chunks_.push_back(chunk);
+    // Reserve space for the remaining items.
+    // Besides being an optimization, it is also required if the converter's
+    // implementation relies on unsafe builder methods in converter->Append().
+    auto remaining = reserved_ - length_;
+    Reset();
+    return Reserve(remaining);
+  }
+
+  Result<std::shared_ptr<ChunkedArray>> ToChunkedArray() {
+    ARROW_RETURN_NOT_OK(FinishChunk());
+    return std::make_shared<ChunkedArray>(chunks_);
+  }
+
+ protected:
+  void Reset() {
+    converter_->builder()->Reset();
+    length_ = 0;
+    reserved_ = 0;
+  }
+
+  int64_t length_ = 0;
+  int64_t reserved_ = 0;
+  std::unique_ptr<Converter> converter_;
+  std::vector<std::shared_ptr<Array>> chunks_;
+};
+
+template <typename T>
+static Result<std::unique_ptr<Chunker<T>>> MakeChunker(std::unique_ptr<T> converter) {
+  return std::make_unique<Chunker<T>>(std::move(converter));
+}
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/counting_semaphore.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/counting_semaphore.h
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_COUNTING_SEMAPHORE_H
+#define ARROW_COUNTING_SEMAPHORE_H
+
+#include <memory>
+
+#include "arrow/status.h"
+
+namespace arrow {
+namespace util {
+
+/// \brief Simple mutex-based counting semaphore with timeout
+class ARROW_EXPORT CountingSemaphore {
+ public:
+  /// \brief Create an instance with initial_avail starting permits
+  ///
+  /// \param[in] initial_avail The semaphore will start with this many permits available
+  /// \param[in] timeout_seconds A timeout to be applied to all operations.  Operations
+  ///            will return Status::Invalid if this timeout elapses
+  explicit CountingSemaphore(uint32_t initial_avail = 0, double timeout_seconds = 10);
+  ~CountingSemaphore();
+  /// \brief Block until num_permits permits are available
+  Status Acquire(uint32_t num_permits);
+  /// \brief Make num_permits permits available
+  Status Release(uint32_t num_permits);
+  /// \brief Wait until num_waiters are waiting on permits
+  ///
+  /// This method is non-standard but useful in unit tests to ensure sequencing
+  Status WaitForWaiters(uint32_t num_waiters);
+  /// \brief Immediately time out any waiters
+  ///
+  /// This method will return Status::OK only if there were no waiters to time out.
+  /// Once closed any operation on this instance will return an invalid status.
+  Status Close();
+
+ private:
+  class Impl;
+  std::unique_ptr<Impl> impl_;
+};
+
+}  // namespace util
+}  // namespace arrow
+
+#endif  // ARROW_COUNTING_SEMAPHORE_H
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/cpu_info.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/cpu_info.h
@@ -0,0 +1,114 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// From Apache Impala (incubating) as of 2016-01-29. Pared down to a minimal
+// set of functions needed for Apache Arrow / Apache parquet-cpp
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+
+/// CpuInfo is an interface to query for cpu information at runtime.  The caller can
+/// ask for the sizes of the caches and what hardware features are supported.
+/// On Linux, this information is pulled from a couple of sys files (/proc/cpuinfo and
+/// /sys/devices)
+class ARROW_EXPORT CpuInfo {
+ public:
+  ~CpuInfo();
+
+  /// x86 features
+  static constexpr int64_t SSSE3 = (1LL << 0);
+  static constexpr int64_t SSE4_1 = (1LL << 1);
+  static constexpr int64_t SSE4_2 = (1LL << 2);
+  static constexpr int64_t POPCNT = (1LL << 3);
+  static constexpr int64_t AVX = (1LL << 4);
+  static constexpr int64_t AVX2 = (1LL << 5);
+  static constexpr int64_t AVX512F = (1LL << 6);
+  static constexpr int64_t AVX512CD = (1LL << 7);
+  static constexpr int64_t AVX512VL = (1LL << 8);
+  static constexpr int64_t AVX512DQ = (1LL << 9);
+  static constexpr int64_t AVX512BW = (1LL << 10);
+  static constexpr int64_t AVX512 = AVX512F | AVX512CD | AVX512VL | AVX512DQ | AVX512BW;
+  static constexpr int64_t BMI1 = (1LL << 11);
+  static constexpr int64_t BMI2 = (1LL << 12);
+
+  /// Arm features
+  static constexpr int64_t ASIMD = (1LL << 32);
+
+  /// Cache enums for L1 (data), L2 and L3
+  enum class CacheLevel { L1 = 0, L2, L3, Last = L3 };
+
+  /// CPU vendors
+  enum class Vendor { Unknown, Intel, AMD };
+
+  static const CpuInfo* GetInstance();
+
+  /// Returns all the flags for this cpu
+  int64_t hardware_flags() const;
+
+  /// Returns the number of cores (including hyper-threaded) on this machine.
+  int num_cores() const;
+
+  /// Returns the vendor of the cpu.
+  Vendor vendor() const;
+
+  /// Returns the model name of the cpu (e.g. Intel i7-2600)
+  const std::string& model_name() const;
+
+  /// Returns the size of the cache in KB at this cache level
+  int64_t CacheSize(CacheLevel level) const;
+
+  /// \brief Returns whether or not the given feature is enabled.
+  ///
+  /// IsSupported() is true iff IsDetected() is also true and the feature
+  /// wasn't disabled by the user (for example by setting the ARROW_USER_SIMD_LEVEL
+  /// environment variable).
+  bool IsSupported(int64_t flags) const;
+
+  /// Returns whether or not the given feature is available on the CPU.
+  bool IsDetected(int64_t flags) const;
+
+  /// Determine if the CPU meets the minimum CPU requirements and if not, issue an error
+  /// and terminate.
+  void VerifyCpuRequirements() const;
+
+  /// Toggle a hardware feature on and off.  It is not valid to turn on a feature
+  /// that the underlying hardware cannot support. This is useful for testing.
+  void EnableFeature(int64_t flag, bool enable);
+
+  bool HasEfficientBmi2() const {
+    // BMI2 (pext, pdep) is only efficient on Intel X86 processors.
+    return vendor() == Vendor::Intel && IsSupported(BMI2);
+  }
+
+ private:
+  CpuInfo();
+
+  struct Impl;
+  std::unique_ptr<Impl> impl_;
+};
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/debug.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/debug.h
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+
+ARROW_EXPORT
+void DebugTrap();
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/decimal.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/decimal.h
@@ -0,0 +1,316 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <iosfwd>
+#include <limits>
+#include <string>
+#include <string_view>
+#include <utility>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/basic_decimal.h"
+
+namespace arrow {
+
+/// Represents a signed 128-bit integer in two's complement.
+/// Calculations wrap around and overflow is ignored.
+/// The max decimal precision that can be safely represented is
+/// 38 significant digits.
+///
+/// For a discussion of the algorithms, look at Knuth's volume 2,
+/// Semi-numerical Algorithms section 4.3.1.
+///
+/// Adapted from the Apache ORC C++ implementation
+///
+/// The implementation is split into two parts :
+///
+/// 1. BasicDecimal128
+///    - can be safely compiled to IR without references to libstdc++.
+/// 2. Decimal128
+///    - has additional functionality on top of BasicDecimal128 to deal with
+///      strings and streams.
+class ARROW_EXPORT Decimal128 : public BasicDecimal128 {
+ public:
+  /// \cond FALSE
+  // (need to avoid a duplicate definition in Sphinx)
+  using BasicDecimal128::BasicDecimal128;
+  /// \endcond
+
+  /// \brief constructor creates a Decimal128 from a BasicDecimal128.
+  constexpr Decimal128(const BasicDecimal128& value) noexcept  // NOLINT runtime/explicit
+      : BasicDecimal128(value) {}
+
+  /// \brief Parse the number from a base 10 string representation.
+  explicit Decimal128(const std::string& value);
+
+  /// \brief Empty constructor creates a Decimal128 with a value of 0.
+  // This is required on some older compilers.
+  constexpr Decimal128() noexcept : BasicDecimal128() {}
+
+  /// Divide this number by right and return the result.
+  ///
+  /// This operation is not destructive.
+  /// The answer rounds to zero. Signs work like:
+  ///   21 /  5 ->  4,  1
+  ///  -21 /  5 -> -4, -1
+  ///   21 / -5 -> -4,  1
+  ///  -21 / -5 ->  4, -1
+  /// \param[in] divisor the number to divide by
+  /// \return the pair of the quotient and the remainder
+  Result<std::pair<Decimal128, Decimal128>> Divide(const Decimal128& divisor) const {
+    std::pair<Decimal128, Decimal128> result;
+    auto dstatus = BasicDecimal128::Divide(divisor, &result.first, &result.second);
+    ARROW_RETURN_NOT_OK(ToArrowStatus(dstatus));
+    return std::move(result);
+  }
+
+  /// \brief Convert the Decimal128 value to a base 10 decimal string with the given
+  /// scale.
+  std::string ToString(int32_t scale) const;
+
+  /// \brief Convert the value to an integer string
+  std::string ToIntegerString() const;
+
+  /// \brief Cast this value to an int64_t.
+  explicit operator int64_t() const;
+
+  /// \brief Convert a decimal string to a Decimal128 value, optionally including
+  /// precision and scale if they're passed in and not null.
+  static Status FromString(const std::string_view& s, Decimal128* out, int32_t* precision,
+                           int32_t* scale = NULLPTR);
+  static Status FromString(const std::string& s, Decimal128* out, int32_t* precision,
+                           int32_t* scale = NULLPTR);
+  static Status FromString(const char* s, Decimal128* out, int32_t* precision,
+                           int32_t* scale = NULLPTR);
+  static Result<Decimal128> FromString(const std::string_view& s);
+  static Result<Decimal128> FromString(const std::string& s);
+  static Result<Decimal128> FromString(const char* s);
+
+  static Result<Decimal128> FromReal(double real, int32_t precision, int32_t scale);
+  static Result<Decimal128> FromReal(float real, int32_t precision, int32_t scale);
+
+  /// \brief Convert from a big-endian byte representation. The length must be
+  ///        between 1 and 16.
+  /// \return error status if the length is an invalid value
+  static Result<Decimal128> FromBigEndian(const uint8_t* data, int32_t length);
+
+  /// \brief Convert Decimal128 from one scale to another
+  Result<Decimal128> Rescale(int32_t original_scale, int32_t new_scale) const {
+    Decimal128 out;
+    auto dstatus = BasicDecimal128::Rescale(original_scale, new_scale, &out);
+    ARROW_RETURN_NOT_OK(ToArrowStatus(dstatus));
+    return std::move(out);
+  }
+
+  /// \brief Convert to a signed integer
+  template <typename T, typename = internal::EnableIfIsOneOf<T, int32_t, int64_t>>
+  Result<T> ToInteger() const {
+    constexpr auto min_value = std::numeric_limits<T>::min();
+    constexpr auto max_value = std::numeric_limits<T>::max();
+    const auto& self = *this;
+    if (self < min_value || self > max_value) {
+      return Status::Invalid("Invalid cast from Decimal128 to ", sizeof(T),
+                             " byte integer");
+    }
+    return static_cast<T>(low_bits());
+  }
+
+  /// \brief Convert to a signed integer
+  template <typename T, typename = internal::EnableIfIsOneOf<T, int32_t, int64_t>>
+  Status ToInteger(T* out) const {
+    return ToInteger<T>().Value(out);
+  }
+
+  /// \brief Convert to a floating-point number (scaled)
+  float ToFloat(int32_t scale) const;
+  /// \brief Convert to a floating-point number (scaled)
+  double ToDouble(int32_t scale) const;
+
+  /// \brief Convert to a floating-point number (scaled)
+  template <typename T>
+  T ToReal(int32_t scale) const {
+    return ToRealConversion<T>::ToReal(*this, scale);
+  }
+
+  ARROW_FRIEND_EXPORT friend std::ostream& operator<<(std::ostream& os,
+                                                      const Decimal128& decimal);
+
+ private:
+  /// Converts internal error code to Status
+  Status ToArrowStatus(DecimalStatus dstatus) const;
+
+  template <typename T>
+  struct ToRealConversion {};
+};
+
+template <>
+struct Decimal128::ToRealConversion<float> {
+  static float ToReal(const Decimal128& dec, int32_t scale) { return dec.ToFloat(scale); }
+};
+
+template <>
+struct Decimal128::ToRealConversion<double> {
+  static double ToReal(const Decimal128& dec, int32_t scale) {
+    return dec.ToDouble(scale);
+  }
+};
+
+/// Represents a signed 256-bit integer in two's complement.
+/// The max decimal precision that can be safely represented is
+/// 76 significant digits.
+///
+/// The implementation is split into two parts :
+///
+/// 1. BasicDecimal256
+///    - can be safely compiled to IR without references to libstdc++.
+/// 2. Decimal256
+///    - (TODO) has additional functionality on top of BasicDecimal256 to deal with
+///      strings and streams.
+class ARROW_EXPORT Decimal256 : public BasicDecimal256 {
+ public:
+  /// \cond FALSE
+  // (need to avoid a duplicate definition in Sphinx)
+  using BasicDecimal256::BasicDecimal256;
+  /// \endcond
+
+  /// \brief constructor creates a Decimal256 from a BasicDecimal256.
+  constexpr Decimal256(const BasicDecimal256& value) noexcept  // NOLINT(runtime/explicit)
+      : BasicDecimal256(value) {}
+
+  /// \brief Parse the number from a base 10 string representation.
+  explicit Decimal256(const std::string& value);
+
+  /// \brief Empty constructor creates a Decimal256 with a value of 0.
+  // This is required on some older compilers.
+  constexpr Decimal256() noexcept : BasicDecimal256() {}
+
+  /// \brief Convert the Decimal256 value to a base 10 decimal string with the given
+  /// scale.
+  std::string ToString(int32_t scale) const;
+
+  /// \brief Convert the value to an integer string
+  std::string ToIntegerString() const;
+
+  /// \brief Convert a decimal string to a Decimal256 value, optionally including
+  /// precision and scale if they're passed in and not null.
+  static Status FromString(const std::string_view& s, Decimal256* out, int32_t* precision,
+                           int32_t* scale = NULLPTR);
+  static Status FromString(const std::string& s, Decimal256* out, int32_t* precision,
+                           int32_t* scale = NULLPTR);
+  static Status FromString(const char* s, Decimal256* out, int32_t* precision,
+                           int32_t* scale = NULLPTR);
+  static Result<Decimal256> FromString(const std::string_view& s);
+  static Result<Decimal256> FromString(const std::string& s);
+  static Result<Decimal256> FromString(const char* s);
+
+  /// \brief Convert Decimal256 from one scale to another
+  Result<Decimal256> Rescale(int32_t original_scale, int32_t new_scale) const {
+    Decimal256 out;
+    auto dstatus = BasicDecimal256::Rescale(original_scale, new_scale, &out);
+    ARROW_RETURN_NOT_OK(ToArrowStatus(dstatus));
+    return std::move(out);
+  }
+
+  /// Divide this number by right and return the result.
+  ///
+  /// This operation is not destructive.
+  /// The answer rounds to zero. Signs work like:
+  ///   21 /  5 ->  4,  1
+  ///  -21 /  5 -> -4, -1
+  ///   21 / -5 -> -4,  1
+  ///  -21 / -5 ->  4, -1
+  /// \param[in] divisor the number to divide by
+  /// \return the pair of the quotient and the remainder
+  Result<std::pair<Decimal256, Decimal256>> Divide(const Decimal256& divisor) const {
+    std::pair<Decimal256, Decimal256> result;
+    auto dstatus = BasicDecimal256::Divide(divisor, &result.first, &result.second);
+    ARROW_RETURN_NOT_OK(ToArrowStatus(dstatus));
+    return std::move(result);
+  }
+
+  /// \brief Convert from a big-endian byte representation. The length must be
+  ///        between 1 and 32.
+  /// \return error status if the length is an invalid value
+  static Result<Decimal256> FromBigEndian(const uint8_t* data, int32_t length);
+
+  static Result<Decimal256> FromReal(double real, int32_t precision, int32_t scale);
+  static Result<Decimal256> FromReal(float real, int32_t precision, int32_t scale);
+
+  /// \brief Convert to a floating-point number (scaled).
+  /// May return infinity in case of overflow.
+  float ToFloat(int32_t scale) const;
+  /// \brief Convert to a floating-point number (scaled)
+  double ToDouble(int32_t scale) const;
+
+  /// \brief Convert to a floating-point number (scaled)
+  template <typename T>
+  T ToReal(int32_t scale) const {
+    return ToRealConversion<T>::ToReal(*this, scale);
+  }
+
+  ARROW_FRIEND_EXPORT friend std::ostream& operator<<(std::ostream& os,
+                                                      const Decimal256& decimal);
+
+ private:
+  /// Converts internal error code to Status
+  Status ToArrowStatus(DecimalStatus dstatus) const;
+
+  template <typename T>
+  struct ToRealConversion {};
+};
+
+template <>
+struct Decimal256::ToRealConversion<float> {
+  static float ToReal(const Decimal256& dec, int32_t scale) { return dec.ToFloat(scale); }
+};
+
+template <>
+struct Decimal256::ToRealConversion<double> {
+  static double ToReal(const Decimal256& dec, int32_t scale) {
+    return dec.ToDouble(scale);
+  }
+};
+
+/// For an integer type, return the max number of decimal digits
+/// (=minimal decimal precision) it can represent.
+inline Result<int32_t> MaxDecimalDigitsForInteger(Type::type type_id) {
+  switch (type_id) {
+    case Type::INT8:
+    case Type::UINT8:
+      return 3;
+    case Type::INT16:
+    case Type::UINT16:
+      return 5;
+    case Type::INT32:
+    case Type::UINT32:
+      return 10;
+    case Type::INT64:
+      return 19;
+    case Type::UINT64:
+      return 20;
+    default:
+      break;
+  }
+  return Status::Invalid("Not an integer type: ", type_id);
+}
+
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/delimiting.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/delimiting.h
@@ -0,0 +1,181 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string_view>
+
+#include "arrow/status.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Buffer;
+
+class ARROW_EXPORT BoundaryFinder {
+ public:
+  BoundaryFinder() = default;
+
+  virtual ~BoundaryFinder();
+
+  /// \brief Find the position of the first delimiter inside block
+  ///
+  /// `partial` is taken to be the beginning of the block, and `block`
+  /// its continuation.  Also, `partial` doesn't contain a delimiter.
+  ///
+  /// The returned `out_pos` is relative to `block`'s start and should point
+  /// to the first character after the first delimiter.
+  /// `out_pos` will be -1 if no delimiter is found.
+  virtual Status FindFirst(std::string_view partial, std::string_view block,
+                           int64_t* out_pos) = 0;
+
+  /// \brief Find the position of the last delimiter inside block
+  ///
+  /// The returned `out_pos` is relative to `block`'s start and should point
+  /// to the first character after the last delimiter.
+  /// `out_pos` will be -1 if no delimiter is found.
+  virtual Status FindLast(std::string_view block, int64_t* out_pos) = 0;
+
+  /// \brief Find the position of the Nth delimiter inside the block
+  ///
+  /// `partial` is taken to be the beginning of the block, and `block`
+  /// its continuation.  Also, `partial` doesn't contain a delimiter.
+  ///
+  /// The returned `out_pos` is relative to `block`'s start and should point
+  /// to the first character after the first delimiter.
+  /// `out_pos` will be -1 if no delimiter is found.
+  ///
+  /// The returned `num_found` is the number of delimiters actually found
+  virtual Status FindNth(std::string_view partial, std::string_view block, int64_t count,
+                         int64_t* out_pos, int64_t* num_found) = 0;
+
+  static constexpr int64_t kNoDelimiterFound = -1;
+
+ protected:
+  ARROW_DISALLOW_COPY_AND_ASSIGN(BoundaryFinder);
+};
+
+ARROW_EXPORT
+std::shared_ptr<BoundaryFinder> MakeNewlineBoundaryFinder();
+
+/// \brief A reusable block-based chunker for delimited data
+///
+/// The chunker takes a block of delimited data and helps carve a sub-block
+/// which begins and ends on delimiters (suitable for consumption by parsers
+/// which can only parse whole objects).
+class ARROW_EXPORT Chunker {
+ public:
+  explicit Chunker(std::shared_ptr<BoundaryFinder> delimiter);
+  ~Chunker();
+
+  /// \brief Carve up a chunk in a block of data to contain only whole objects
+  ///
+  /// Pre-conditions:
+  /// - `block` is the start of a valid block of delimited data
+  ///   (i.e. starts just after a delimiter)
+  ///
+  /// Post-conditions:
+  /// - block == whole + partial
+  /// - `whole` is a valid block of delimited data
+  ///   (i.e. starts just after a delimiter and ends with a delimiter)
+  /// - `partial` doesn't contain an entire delimited object
+  ///   (IOW: `partial` is generally small)
+  ///
+  /// This method will look for the last delimiter in `block` and may
+  /// therefore be costly.
+  ///
+  /// \param[in] block data to be chunked
+  /// \param[out] whole subrange of block containing whole delimited objects
+  /// \param[out] partial subrange of block starting with a partial delimited object
+  Status Process(std::shared_ptr<Buffer> block, std::shared_ptr<Buffer>* whole,
+                 std::shared_ptr<Buffer>* partial);
+
+  /// \brief Carve the completion of a partial object out of a block
+  ///
+  /// Pre-conditions:
+  /// - `partial` is the start of a valid block of delimited data
+  ///   (i.e. starts just after a delimiter)
+  /// - `block` follows `partial` in file order
+  ///
+  /// Post-conditions:
+  /// - block == completion + rest
+  /// - `partial + completion` is a valid block of delimited data
+  ///   (i.e. starts just after a delimiter and ends with a delimiter)
+  /// - `completion` doesn't contain an entire delimited object
+  ///   (IOW: `completion` is generally small)
+  ///
+  /// This method will look for the first delimiter in `block` and should
+  /// therefore be reasonably cheap.
+  ///
+  /// \param[in] partial incomplete delimited data
+  /// \param[in] block delimited data following partial
+  /// \param[out] completion subrange of block containing the completion of partial
+  /// \param[out] rest subrange of block containing what completion does not cover
+  Status ProcessWithPartial(std::shared_ptr<Buffer> partial,
+                            std::shared_ptr<Buffer> block,
+                            std::shared_ptr<Buffer>* completion,
+                            std::shared_ptr<Buffer>* rest);
+
+  /// \brief Like ProcessWithPartial, but for the last block of a file
+  ///
+  /// This method allows for a final delimited object without a trailing delimiter
+  /// (ProcessWithPartial would return an error in that case).
+  ///
+  /// Pre-conditions:
+  /// - `partial` is the start of a valid block of delimited data
+  /// - `block` follows `partial` in file order and is the last data block
+  ///
+  /// Post-conditions:
+  /// - block == completion + rest
+  /// - `partial + completion` is a valid block of delimited data
+  /// - `completion` doesn't contain an entire delimited object
+  ///   (IOW: `completion` is generally small)
+  ///
+  Status ProcessFinal(std::shared_ptr<Buffer> partial, std::shared_ptr<Buffer> block,
+                      std::shared_ptr<Buffer>* completion, std::shared_ptr<Buffer>* rest);
+
+  /// \brief Skip count number of rows
+  /// Pre-conditions:
+  /// - `partial` is the start of a valid block of delimited data
+  ///   (i.e. starts just after a delimiter)
+  /// - `block` follows `partial` in file order
+  ///
+  /// Post-conditions:
+  /// - `count` is updated to indicate the number of rows that still need to be skipped
+  /// - If `count` is > 0 then `rest` is an incomplete block that should be a future
+  /// `partial`
+  /// - Else `rest` could be one or more valid blocks of delimited data which need to be
+  /// parsed
+  ///
+  /// \param[in] partial incomplete delimited data
+  /// \param[in] block delimited data following partial
+  /// \param[in] final whether this is the final chunk
+  /// \param[in,out] count number of rows that need to be skipped
+  /// \param[out] rest subrange of block containing what was not skipped
+  Status ProcessSkip(std::shared_ptr<Buffer> partial, std::shared_ptr<Buffer> block,
+                     bool final, int64_t* count, std::shared_ptr<Buffer>* rest);
+
+ protected:
+  ARROW_DISALLOW_COPY_AND_ASSIGN(Chunker);
+
+  std::shared_ptr<BoundaryFinder> boundary_finder_;
+};
+
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/dispatch.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/dispatch.h
@@ -0,0 +1,115 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <utility>
+#include <vector>
+
+#include "arrow/status.h"
+#include "arrow/util/cpu_info.h"
+
+namespace arrow {
+namespace internal {
+
+enum class DispatchLevel : int {
+  // These dispatch levels, corresponding to instruction set features,
+  // are sorted in increasing order of preference.
+  NONE = 0,
+  SSE4_2,
+  AVX2,
+  AVX512,
+  NEON,
+  MAX
+};
+
+/*
+  A facility for dynamic dispatch according to available DispatchLevel.
+
+  Typical use:
+
+    static void my_function_default(...);
+    static void my_function_avx2(...);
+
+    struct MyDynamicFunction {
+      using FunctionType = decltype(&my_function_default);
+
+      static std::vector<std::pair<DispatchLevel, FunctionType>> implementations() {
+        return {
+          { DispatchLevel::NONE, my_function_default }
+    #if defined(ARROW_HAVE_RUNTIME_AVX2)
+          , { DispatchLevel::AVX2, my_function_avx2 }
+    #endif
+        };
+      }
+    };
+
+    void my_function(...) {
+      static DynamicDispatch<MyDynamicFunction> dispatch;
+      return dispatch.func(...);
+    }
+*/
+template <typename DynamicFunction>
+class DynamicDispatch {
+ protected:
+  using FunctionType = typename DynamicFunction::FunctionType;
+  using Implementation = std::pair<DispatchLevel, FunctionType>;
+
+ public:
+  DynamicDispatch() { Resolve(DynamicFunction::implementations()); }
+
+  FunctionType func = {};
+
+ protected:
+  // Use the Implementation with the highest DispatchLevel
+  void Resolve(const std::vector<Implementation>& implementations) {
+    Implementation cur{DispatchLevel::NONE, {}};
+
+    for (const auto& impl : implementations) {
+      if (impl.first >= cur.first && IsSupported(impl.first)) {
+        // Higher (or same) level than current
+        cur = impl;
+      }
+    }
+
+    if (!cur.second) {
+      Status::Invalid("No appropriate implementation found").Abort();
+    }
+    func = cur.second;
+  }
+
+ private:
+  bool IsSupported(DispatchLevel level) const {
+    static const auto cpu_info = arrow::internal::CpuInfo::GetInstance();
+
+    switch (level) {
+      case DispatchLevel::NONE:
+        return true;
+      case DispatchLevel::SSE4_2:
+        return cpu_info->IsSupported(CpuInfo::SSE4_2);
+      case DispatchLevel::AVX2:
+        return cpu_info->IsSupported(CpuInfo::AVX2);
+      case DispatchLevel::AVX512:
+        return cpu_info->IsSupported(CpuInfo::AVX512);
+      default:
+        return false;
+    }
+  }
+};
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/double_conversion.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/double_conversion.h
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/vendored/double-conversion/double-conversion.h"  // IWYU pragma: export
+
+namespace arrow {
+namespace util {
+namespace double_conversion {
+
+using ::double_conversion::DoubleToStringConverter;
+using ::double_conversion::StringBuilder;
+using ::double_conversion::StringToDoubleConverter;
+
+}  // namespace double_conversion
+}  // namespace util
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/endian.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/endian.h
@@ -0,0 +1,245 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#ifdef _WIN32
+#define ARROW_LITTLE_ENDIAN 1
+#else
+#if defined(__APPLE__) || defined(__FreeBSD__)
+#include <machine/endian.h>  // IWYU pragma: keep
+#elif defined(sun) || defined(__sun)
+#include <sys/byteorder.h>  // IWYU pragma: keep
+#else
+#include <endian.h>  // IWYU pragma: keep
+#endif
+#
+#ifndef __BYTE_ORDER__
+#error "__BYTE_ORDER__ not defined"
+#endif
+#
+#ifndef __ORDER_LITTLE_ENDIAN__
+#error "__ORDER_LITTLE_ENDIAN__ not defined"
+#endif
+#
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define ARROW_LITTLE_ENDIAN 1
+#else
+#define ARROW_LITTLE_ENDIAN 0
+#endif
+#endif
+
+#if defined(_MSC_VER)
+#include <intrin.h>  // IWYU pragma: keep
+#define ARROW_BYTE_SWAP64 _byteswap_uint64
+#define ARROW_BYTE_SWAP32 _byteswap_ulong
+#else
+#define ARROW_BYTE_SWAP64 __builtin_bswap64
+#define ARROW_BYTE_SWAP32 __builtin_bswap32
+#endif
+
+#include <algorithm>
+#include <array>
+
+#include "arrow/util/type_traits.h"
+#include "arrow/util/ubsan.h"
+
+namespace arrow {
+namespace bit_util {
+
+//
+// Byte-swap 16-bit, 32-bit and 64-bit values
+//
+
+// Swap the byte order (i.e. endianness)
+static inline int64_t ByteSwap(int64_t value) { return ARROW_BYTE_SWAP64(value); }
+static inline uint64_t ByteSwap(uint64_t value) {
+  return static_cast<uint64_t>(ARROW_BYTE_SWAP64(value));
+}
+static inline int32_t ByteSwap(int32_t value) { return ARROW_BYTE_SWAP32(value); }
+static inline uint32_t ByteSwap(uint32_t value) {
+  return static_cast<uint32_t>(ARROW_BYTE_SWAP32(value));
+}
+static inline int16_t ByteSwap(int16_t value) {
+  constexpr auto m = static_cast<int16_t>(0xff);
+  return static_cast<int16_t>(((value >> 8) & m) | ((value & m) << 8));
+}
+static inline uint16_t ByteSwap(uint16_t value) {
+  return static_cast<uint16_t>(ByteSwap(static_cast<int16_t>(value)));
+}
+static inline uint8_t ByteSwap(uint8_t value) { return value; }
+static inline int8_t ByteSwap(int8_t value) { return value; }
+static inline double ByteSwap(double value) {
+  const uint64_t swapped = ARROW_BYTE_SWAP64(util::SafeCopy<uint64_t>(value));
+  return util::SafeCopy<double>(swapped);
+}
+static inline float ByteSwap(float value) {
+  const uint32_t swapped = ARROW_BYTE_SWAP32(util::SafeCopy<uint32_t>(value));
+  return util::SafeCopy<float>(swapped);
+}
+
+// Write the swapped bytes into dst. Src and dst cannot overlap.
+static inline void ByteSwap(void* dst, const void* src, int len) {
+  switch (len) {
+    case 1:
+      *reinterpret_cast<int8_t*>(dst) = *reinterpret_cast<const int8_t*>(src);
+      return;
+    case 2:
+      *reinterpret_cast<int16_t*>(dst) = ByteSwap(*reinterpret_cast<const int16_t*>(src));
+      return;
+    case 4:
+      *reinterpret_cast<int32_t*>(dst) = ByteSwap(*reinterpret_cast<const int32_t*>(src));
+      return;
+    case 8:
+      *reinterpret_cast<int64_t*>(dst) = ByteSwap(*reinterpret_cast<const int64_t*>(src));
+      return;
+    default:
+      break;
+  }
+
+  auto d = reinterpret_cast<uint8_t*>(dst);
+  auto s = reinterpret_cast<const uint8_t*>(src);
+  for (int i = 0; i < len; ++i) {
+    d[i] = s[len - i - 1];
+  }
+}
+
+// Convert to little/big endian format from the machine's native endian format.
+#if ARROW_LITTLE_ENDIAN
+template <typename T, typename = internal::EnableIfIsOneOf<
+                          T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t,
+                          uint8_t, int8_t, float, double, bool>>
+static inline T ToBigEndian(T value) {
+  return ByteSwap(value);
+}
+
+template <typename T, typename = internal::EnableIfIsOneOf<
+                          T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t,
+                          uint8_t, int8_t, float, double, bool>>
+static inline T ToLittleEndian(T value) {
+  return value;
+}
+#else
+template <typename T, typename = internal::EnableIfIsOneOf<
+                          T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t,
+                          uint8_t, int8_t, float, double, bool>>
+static inline T ToBigEndian(T value) {
+  return value;
+}
+
+template <typename T, typename = internal::EnableIfIsOneOf<
+                          T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t,
+                          uint8_t, int8_t, float, double, bool>>
+static inline T ToLittleEndian(T value) {
+  return ByteSwap(value);
+}
+#endif
+
+// Convert from big/little endian format to the machine's native endian format.
+#if ARROW_LITTLE_ENDIAN
+template <typename T, typename = internal::EnableIfIsOneOf<
+                          T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t,
+                          uint8_t, int8_t, float, double, bool>>
+static inline T FromBigEndian(T value) {
+  return ByteSwap(value);
+}
+
+template <typename T, typename = internal::EnableIfIsOneOf<
+                          T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t,
+                          uint8_t, int8_t, float, double, bool>>
+static inline T FromLittleEndian(T value) {
+  return value;
+}
+#else
+template <typename T, typename = internal::EnableIfIsOneOf<
+                          T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t,
+                          uint8_t, int8_t, float, double, bool>>
+static inline T FromBigEndian(T value) {
+  return value;
+}
+
+template <typename T, typename = internal::EnableIfIsOneOf<
+                          T, int64_t, uint64_t, int32_t, uint32_t, int16_t, uint16_t,
+                          uint8_t, int8_t, float, double, bool>>
+static inline T FromLittleEndian(T value) {
+  return ByteSwap(value);
+}
+#endif
+
+// Handle endianness in *word* granuality (keep individual array element untouched)
+namespace little_endian {
+
+namespace detail {
+
+// Read a native endian array as little endian
+template <typename T, size_t N>
+struct Reader {
+  const std::array<T, N>& native_array;
+
+  explicit Reader(const std::array<T, N>& native_array) : native_array(native_array) {}
+
+  const T& operator[](size_t i) const {
+    return native_array[ARROW_LITTLE_ENDIAN ? i : N - 1 - i];
+  }
+};
+
+// Read/write a native endian array as little endian
+template <typename T, size_t N>
+struct Writer {
+  std::array<T, N>* native_array;
+
+  explicit Writer(std::array<T, N>* native_array) : native_array(native_array) {}
+
+  const T& operator[](size_t i) const {
+    return (*native_array)[ARROW_LITTLE_ENDIAN ? i : N - 1 - i];
+  }
+  T& operator[](size_t i) { return (*native_array)[ARROW_LITTLE_ENDIAN ? i : N - 1 - i]; }
+};
+
+}  // namespace detail
+
+// Construct array reader and try to deduce template augments
+template <typename T, size_t N>
+static inline detail::Reader<T, N> Make(const std::array<T, N>& native_array) {
+  return detail::Reader<T, N>(native_array);
+}
+
+// Construct array writer and try to deduce template augments
+template <typename T, size_t N>
+static inline detail::Writer<T, N> Make(std::array<T, N>* native_array) {
+  return detail::Writer<T, N>(native_array);
+}
+
+// Convert little endian array to native endian
+template <typename T, size_t N>
+static inline std::array<T, N> ToNative(std::array<T, N> array) {
+  if (!ARROW_LITTLE_ENDIAN) {
+    std::reverse(array.begin(), array.end());
+  }
+  return array;
+}
+
+// Convert native endian array to little endian
+template <typename T, size_t N>
+static inline std::array<T, N> FromNative(std::array<T, N> array) {
+  return ToNative(array);
+}
+
+}  // namespace little_endian
+
+}  // namespace bit_util
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/formatting.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/formatting.h
@@ -0,0 +1,635 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This is a private header for number-to-string formatting utilities
+
+#pragma once
+
+#include <array>
+#include <cassert>
+#include <chrono>
+#include <limits>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <type_traits>
+#include <utility>
+
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/double_conversion.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/string.h"
+#include "arrow/util/time.h"
+#include "arrow/util/visibility.h"
+#include "arrow/vendored/datetime.h"
+
+namespace arrow {
+namespace internal {
+
+/// \brief The entry point for conversion to strings.
+template <typename ARROW_TYPE, typename Enable = void>
+class StringFormatter;
+
+template <typename T>
+struct is_formattable {
+  template <typename U, typename = typename StringFormatter<U>::value_type>
+  static std::true_type Test(U*);
+
+  template <typename U>
+  static std::false_type Test(...);
+
+  static constexpr bool value = decltype(Test<T>(NULLPTR))::value;
+};
+
+template <typename T, typename R = void>
+using enable_if_formattable = enable_if_t<is_formattable<T>::value, R>;
+
+template <typename Appender>
+using Return = decltype(std::declval<Appender>()(std::string_view{}));
+
+/////////////////////////////////////////////////////////////////////////
+// Boolean formatting
+
+template <>
+class StringFormatter<BooleanType> {
+ public:
+  explicit StringFormatter(const DataType* = NULLPTR) {}
+
+  using value_type = bool;
+
+  template <typename Appender>
+  Return<Appender> operator()(bool value, Appender&& append) {
+    if (value) {
+      const char string[] = "true";
+      return append(std::string_view(string));
+    } else {
+      const char string[] = "false";
+      return append(std::string_view(string));
+    }
+  }
+};
+
+/////////////////////////////////////////////////////////////////////////
+// Decimals formatting
+
+template <typename ARROW_TYPE>
+class DecimalToStringFormatterMixin {
+ public:
+  explicit DecimalToStringFormatterMixin(const DataType* type)
+      : scale_(static_cast<const ARROW_TYPE*>(type)->scale()) {}
+
+  using value_type = typename TypeTraits<ARROW_TYPE>::CType;
+
+  template <typename Appender>
+  Return<Appender> operator()(const value_type& value, Appender&& append) {
+    return append(value.ToString(scale_));
+  }
+
+ private:
+  int32_t scale_;
+};
+
+template <>
+class StringFormatter<Decimal128Type>
+    : public DecimalToStringFormatterMixin<Decimal128Type> {
+  using DecimalToStringFormatterMixin::DecimalToStringFormatterMixin;
+};
+
+template <>
+class StringFormatter<Decimal256Type>
+    : public DecimalToStringFormatterMixin<Decimal256Type> {
+  using DecimalToStringFormatterMixin::DecimalToStringFormatterMixin;
+};
+
+/////////////////////////////////////////////////////////////////////////
+// Integer formatting
+
+namespace detail {
+
+// A 2x100 direct table mapping integers in [0..99] to their decimal representations.
+ARROW_EXPORT extern const char digit_pairs[];
+
+// Based on fmtlib's format_int class:
+// Write digits from right to left into a stack allocated buffer
+inline void FormatOneChar(char c, char** cursor) { *--*cursor = c; }
+
+template <typename Int>
+void FormatOneDigit(Int value, char** cursor) {
+  assert(value >= 0 && value <= 9);
+  FormatOneChar(static_cast<char>('0' + value), cursor);
+}
+
+template <typename Int>
+void FormatTwoDigits(Int value, char** cursor) {
+  assert(value >= 0 && value <= 99);
+  auto digit_pair = &digit_pairs[value * 2];
+  FormatOneChar(digit_pair[1], cursor);
+  FormatOneChar(digit_pair[0], cursor);
+}
+
+template <typename Int>
+void FormatAllDigits(Int value, char** cursor) {
+  assert(value >= 0);
+  while (value >= 100) {
+    FormatTwoDigits(value % 100, cursor);
+    value /= 100;
+  }
+
+  if (value >= 10) {
+    FormatTwoDigits(value, cursor);
+  } else {
+    FormatOneDigit(value, cursor);
+  }
+}
+
+template <typename Int>
+void FormatAllDigitsLeftPadded(Int value, size_t pad, char pad_char, char** cursor) {
+  auto end = *cursor - pad;
+  FormatAllDigits(value, cursor);
+  while (*cursor > end) {
+    FormatOneChar(pad_char, cursor);
+  }
+}
+
+template <size_t BUFFER_SIZE>
+std::string_view ViewDigitBuffer(const std::array<char, BUFFER_SIZE>& buffer,
+                                 char* cursor) {
+  auto buffer_end = buffer.data() + BUFFER_SIZE;
+  return {cursor, static_cast<size_t>(buffer_end - cursor)};
+}
+
+template <typename Int, typename UInt = typename std::make_unsigned<Int>::type>
+constexpr UInt Abs(Int value) {
+  return value < 0 ? ~static_cast<UInt>(value) + 1 : static_cast<UInt>(value);
+}
+
+template <typename Int>
+constexpr size_t Digits10(Int value) {
+  return value <= 9 ? 1 : Digits10(value / 10) + 1;
+}
+
+}  // namespace detail
+
+template <typename ARROW_TYPE>
+class IntToStringFormatterMixin {
+ public:
+  explicit IntToStringFormatterMixin(const DataType* = NULLPTR) {}
+
+  using value_type = typename ARROW_TYPE::c_type;
+
+  template <typename Appender>
+  Return<Appender> operator()(value_type value, Appender&& append) {
+    constexpr size_t buffer_size =
+        detail::Digits10(std::numeric_limits<value_type>::max()) + 1;
+
+    std::array<char, buffer_size> buffer;
+    char* cursor = buffer.data() + buffer_size;
+    detail::FormatAllDigits(detail::Abs(value), &cursor);
+    if (value < 0) {
+      detail::FormatOneChar('-', &cursor);
+    }
+    return append(detail::ViewDigitBuffer(buffer, cursor));
+  }
+};
+
+template <>
+class StringFormatter<Int8Type> : public IntToStringFormatterMixin<Int8Type> {
+  using IntToStringFormatterMixin::IntToStringFormatterMixin;
+};
+
+template <>
+class StringFormatter<Int16Type> : public IntToStringFormatterMixin<Int16Type> {
+  using IntToStringFormatterMixin::IntToStringFormatterMixin;
+};
+
+template <>
+class StringFormatter<Int32Type> : public IntToStringFormatterMixin<Int32Type> {
+  using IntToStringFormatterMixin::IntToStringFormatterMixin;
+};
+
+template <>
+class StringFormatter<Int64Type> : public IntToStringFormatterMixin<Int64Type> {
+  using IntToStringFormatterMixin::IntToStringFormatterMixin;
+};
+
+template <>
+class StringFormatter<UInt8Type> : public IntToStringFormatterMixin<UInt8Type> {
+  using IntToStringFormatterMixin::IntToStringFormatterMixin;
+};
+
+template <>
+class StringFormatter<UInt16Type> : public IntToStringFormatterMixin<UInt16Type> {
+  using IntToStringFormatterMixin::IntToStringFormatterMixin;
+};
+
+template <>
+class StringFormatter<UInt32Type> : public IntToStringFormatterMixin<UInt32Type> {
+  using IntToStringFormatterMixin::IntToStringFormatterMixin;
+};
+
+template <>
+class StringFormatter<UInt64Type> : public IntToStringFormatterMixin<UInt64Type> {
+  using IntToStringFormatterMixin::IntToStringFormatterMixin;
+};
+
+/////////////////////////////////////////////////////////////////////////
+// Floating-point formatting
+
+class ARROW_EXPORT FloatToStringFormatter {
+ public:
+  FloatToStringFormatter();
+  FloatToStringFormatter(int flags, const char* inf_symbol, const char* nan_symbol,
+                         char exp_character, int decimal_in_shortest_low,
+                         int decimal_in_shortest_high,
+                         int max_leading_padding_zeroes_in_precision_mode,
+                         int max_trailing_padding_zeroes_in_precision_mode);
+  ~FloatToStringFormatter();
+
+  // Returns the number of characters written
+  int FormatFloat(float v, char* out_buffer, int out_size);
+  int FormatFloat(double v, char* out_buffer, int out_size);
+
+ protected:
+  struct Impl;
+  std::unique_ptr<Impl> impl_;
+};
+
+template <typename ARROW_TYPE>
+class FloatToStringFormatterMixin : public FloatToStringFormatter {
+ public:
+  using value_type = typename ARROW_TYPE::c_type;
+
+  static constexpr int buffer_size = 50;
+
+  explicit FloatToStringFormatterMixin(const DataType* = NULLPTR) {}
+
+  FloatToStringFormatterMixin(int flags, const char* inf_symbol, const char* nan_symbol,
+                              char exp_character, int decimal_in_shortest_low,
+                              int decimal_in_shortest_high,
+                              int max_leading_padding_zeroes_in_precision_mode,
+                              int max_trailing_padding_zeroes_in_precision_mode)
+      : FloatToStringFormatter(flags, inf_symbol, nan_symbol, exp_character,
+                               decimal_in_shortest_low, decimal_in_shortest_high,
+                               max_leading_padding_zeroes_in_precision_mode,
+                               max_trailing_padding_zeroes_in_precision_mode) {}
+
+  template <typename Appender>
+  Return<Appender> operator()(value_type value, Appender&& append) {
+    char buffer[buffer_size];
+    int size = FormatFloat(value, buffer, buffer_size);
+    return append(std::string_view(buffer, size));
+  }
+};
+
+template <>
+class StringFormatter<FloatType> : public FloatToStringFormatterMixin<FloatType> {
+ public:
+  using FloatToStringFormatterMixin::FloatToStringFormatterMixin;
+};
+
+template <>
+class StringFormatter<DoubleType> : public FloatToStringFormatterMixin<DoubleType> {
+ public:
+  using FloatToStringFormatterMixin::FloatToStringFormatterMixin;
+};
+
+/////////////////////////////////////////////////////////////////////////
+// Temporal formatting
+
+namespace detail {
+
+constexpr size_t BufferSizeYYYY_MM_DD() {
+  return 1 + detail::Digits10(99999) + 1 + detail::Digits10(12) + 1 +
+         detail::Digits10(31);
+}
+
+inline void FormatYYYY_MM_DD(arrow_vendored::date::year_month_day ymd, char** cursor) {
+  FormatTwoDigits(static_cast<unsigned>(ymd.day()), cursor);
+  FormatOneChar('-', cursor);
+  FormatTwoDigits(static_cast<unsigned>(ymd.month()), cursor);
+  FormatOneChar('-', cursor);
+  auto year = static_cast<int>(ymd.year());
+  const auto is_neg_year = year < 0;
+  year = std::abs(year);
+  assert(year <= 99999);
+  FormatTwoDigits(year % 100, cursor);
+  year /= 100;
+  FormatTwoDigits(year % 100, cursor);
+  if (year >= 100) {
+    FormatOneDigit(year / 100, cursor);
+  }
+  if (is_neg_year) {
+    FormatOneChar('-', cursor);
+  }
+}
+
+template <typename Duration>
+constexpr size_t BufferSizeHH_MM_SS() {
+  return detail::Digits10(23) + 1 + detail::Digits10(59) + 1 + detail::Digits10(59) + 1 +
+         detail::Digits10(Duration::period::den) - 1;
+}
+
+template <typename Duration>
+void FormatHH_MM_SS(arrow_vendored::date::hh_mm_ss<Duration> hms, char** cursor) {
+  constexpr size_t subsecond_digits = Digits10(Duration::period::den) - 1;
+  if (subsecond_digits != 0) {
+    FormatAllDigitsLeftPadded(hms.subseconds().count(), subsecond_digits, '0', cursor);
+    FormatOneChar('.', cursor);
+  }
+  FormatTwoDigits(hms.seconds().count(), cursor);
+  FormatOneChar(':', cursor);
+  FormatTwoDigits(hms.minutes().count(), cursor);
+  FormatOneChar(':', cursor);
+  FormatTwoDigits(hms.hours().count(), cursor);
+}
+
+// Some out-of-bound datetime values would result in erroneous printing
+// because of silent integer wraparound in the `arrow_vendored::date` library.
+//
+// To avoid such misprinting, we must therefore check the bounds explicitly.
+// The bounds correspond to start of year -32767 and end of year 32767,
+// respectively (-32768 is an invalid year value in `arrow_vendored::date`).
+//
+// Note these values are the same as documented for C++20:
+// https://en.cppreference.com/w/cpp/chrono/year_month_day/operator_days
+template <typename Unit>
+bool IsDateTimeInRange(Unit duration) {
+  constexpr Unit kMinIncl =
+      std::chrono::duration_cast<Unit>(arrow_vendored::date::days{-12687428});
+  constexpr Unit kMaxExcl =
+      std::chrono::duration_cast<Unit>(arrow_vendored::date::days{11248738});
+  return duration >= kMinIncl && duration < kMaxExcl;
+}
+
+// IsDateTimeInRange() specialization for nanoseconds: a 64-bit number of
+// nanoseconds cannot represent years outside of the [-32767, 32767]
+// range, and the {kMinIncl, kMaxExcl} constants above would overflow.
+constexpr bool IsDateTimeInRange(std::chrono::nanoseconds duration) { return true; }
+
+template <typename Unit>
+bool IsTimeInRange(Unit duration) {
+  constexpr Unit kMinIncl = std::chrono::duration_cast<Unit>(std::chrono::seconds{0});
+  constexpr Unit kMaxExcl = std::chrono::duration_cast<Unit>(std::chrono::seconds{86400});
+  return duration >= kMinIncl && duration < kMaxExcl;
+}
+
+template <typename RawValue, typename Appender>
+Return<Appender> FormatOutOfRange(RawValue&& raw_value, Appender&& append) {
+  // XXX locale-sensitive but good enough for now
+  std::string formatted = "<value out of range: " + ToChars(raw_value) + ">";
+  return append(std::move(formatted));
+}
+
+const auto kEpoch = arrow_vendored::date::sys_days{arrow_vendored::date::jan / 1 / 1970};
+
+}  // namespace detail
+
+template <>
+class StringFormatter<DurationType> : public IntToStringFormatterMixin<DurationType> {
+  using IntToStringFormatterMixin::IntToStringFormatterMixin;
+};
+
+class DateToStringFormatterMixin {
+ public:
+  explicit DateToStringFormatterMixin(const DataType* = NULLPTR) {}
+
+ protected:
+  template <typename Appender>
+  Return<Appender> FormatDays(arrow_vendored::date::days since_epoch, Appender&& append) {
+    arrow_vendored::date::sys_days timepoint_days{since_epoch};
+
+    constexpr size_t buffer_size = detail::BufferSizeYYYY_MM_DD();
+
+    std::array<char, buffer_size> buffer;
+    char* cursor = buffer.data() + buffer_size;
+
+    detail::FormatYYYY_MM_DD(arrow_vendored::date::year_month_day{timepoint_days},
+                             &cursor);
+    return append(detail::ViewDigitBuffer(buffer, cursor));
+  }
+};
+
+template <>
+class StringFormatter<Date32Type> : public DateToStringFormatterMixin {
+ public:
+  using value_type = typename Date32Type::c_type;
+
+  using DateToStringFormatterMixin::DateToStringFormatterMixin;
+
+  template <typename Appender>
+  Return<Appender> operator()(value_type value, Appender&& append) {
+    const auto since_epoch = arrow_vendored::date::days{value};
+    if (!ARROW_PREDICT_TRUE(detail::IsDateTimeInRange(since_epoch))) {
+      return detail::FormatOutOfRange(value, append);
+    }
+    return FormatDays(since_epoch, std::forward<Appender>(append));
+  }
+};
+
+template <>
+class StringFormatter<Date64Type> : public DateToStringFormatterMixin {
+ public:
+  using value_type = typename Date64Type::c_type;
+
+  using DateToStringFormatterMixin::DateToStringFormatterMixin;
+
+  template <typename Appender>
+  Return<Appender> operator()(value_type value, Appender&& append) {
+    const auto since_epoch = std::chrono::milliseconds{value};
+    if (!ARROW_PREDICT_TRUE(detail::IsDateTimeInRange(since_epoch))) {
+      return detail::FormatOutOfRange(value, append);
+    }
+    return FormatDays(std::chrono::duration_cast<arrow_vendored::date::days>(since_epoch),
+                      std::forward<Appender>(append));
+  }
+};
+
+template <>
+class StringFormatter<TimestampType> {
+ public:
+  using value_type = int64_t;
+
+  explicit StringFormatter(const DataType* type)
+      : unit_(checked_cast<const TimestampType&>(*type).unit()) {}
+
+  template <typename Duration, typename Appender>
+  Return<Appender> operator()(Duration, value_type value, Appender&& append) {
+    using arrow_vendored::date::days;
+
+    const Duration since_epoch{value};
+    if (!ARROW_PREDICT_TRUE(detail::IsDateTimeInRange(since_epoch))) {
+      return detail::FormatOutOfRange(value, append);
+    }
+
+    const auto timepoint = detail::kEpoch + since_epoch;
+    // Round days towards zero
+    // (the naive approach of using arrow_vendored::date::floor() would
+    //  result in UB for very large negative timestamps, similarly as
+    //  https://github.com/HowardHinnant/date/issues/696)
+    auto timepoint_days = std::chrono::time_point_cast<days>(timepoint);
+    Duration since_midnight;
+    if (timepoint_days <= timepoint) {
+      // Year >= 1970
+      since_midnight = timepoint - timepoint_days;
+    } else {
+      // Year < 1970
+      since_midnight = days(1) - (timepoint_days - timepoint);
+      timepoint_days -= days(1);
+    }
+
+    constexpr size_t buffer_size =
+        detail::BufferSizeYYYY_MM_DD() + 1 + detail::BufferSizeHH_MM_SS<Duration>();
+
+    std::array<char, buffer_size> buffer;
+    char* cursor = buffer.data() + buffer_size;
+
+    detail::FormatHH_MM_SS(arrow_vendored::date::make_time(since_midnight), &cursor);
+    detail::FormatOneChar(' ', &cursor);
+    detail::FormatYYYY_MM_DD(timepoint_days, &cursor);
+    return append(detail::ViewDigitBuffer(buffer, cursor));
+  }
+
+  template <typename Appender>
+  Return<Appender> operator()(value_type value, Appender&& append) {
+    return util::VisitDuration(unit_, *this, value, std::forward<Appender>(append));
+  }
+
+ private:
+  TimeUnit::type unit_;
+};
+
+template <typename T>
+class StringFormatter<T, enable_if_time<T>> {
+ public:
+  using value_type = typename T::c_type;
+
+  explicit StringFormatter(const DataType* type)
+      : unit_(checked_cast<const T&>(*type).unit()) {}
+
+  template <typename Duration, typename Appender>
+  Return<Appender> operator()(Duration, value_type count, Appender&& append) {
+    const Duration since_midnight{count};
+    if (!ARROW_PREDICT_TRUE(detail::IsTimeInRange(since_midnight))) {
+      return detail::FormatOutOfRange(count, append);
+    }
+
+    constexpr size_t buffer_size = detail::BufferSizeHH_MM_SS<Duration>();
+
+    std::array<char, buffer_size> buffer;
+    char* cursor = buffer.data() + buffer_size;
+
+    detail::FormatHH_MM_SS(arrow_vendored::date::make_time(since_midnight), &cursor);
+    return append(detail::ViewDigitBuffer(buffer, cursor));
+  }
+
+  template <typename Appender>
+  Return<Appender> operator()(value_type value, Appender&& append) {
+    return util::VisitDuration(unit_, *this, value, std::forward<Appender>(append));
+  }
+
+ private:
+  TimeUnit::type unit_;
+};
+
+template <>
+class StringFormatter<MonthIntervalType> {
+ public:
+  using value_type = MonthIntervalType::c_type;
+
+  explicit StringFormatter(const DataType*) {}
+
+  template <typename Appender>
+  Return<Appender> operator()(value_type interval, Appender&& append) {
+    constexpr size_t buffer_size =
+        /*'m'*/ 3 + /*negative signs*/ 1 +
+        /*months*/ detail::Digits10(std::numeric_limits<value_type>::max());
+    std::array<char, buffer_size> buffer;
+    char* cursor = buffer.data() + buffer_size;
+
+    detail::FormatOneChar('M', &cursor);
+    detail::FormatAllDigits(detail::Abs(interval), &cursor);
+    if (interval < 0) detail::FormatOneChar('-', &cursor);
+
+    return append(detail::ViewDigitBuffer(buffer, cursor));
+  }
+};
+
+template <>
+class StringFormatter<DayTimeIntervalType> {
+ public:
+  using value_type = DayTimeIntervalType::DayMilliseconds;
+
+  explicit StringFormatter(const DataType*) {}
+
+  template <typename Appender>
+  Return<Appender> operator()(value_type interval, Appender&& append) {
+    constexpr size_t buffer_size =
+        /*d, ms*/ 3 + /*negative signs*/ 2 +
+        /*days/milliseconds*/ 2 * detail::Digits10(std::numeric_limits<int32_t>::max());
+    std::array<char, buffer_size> buffer;
+    char* cursor = buffer.data() + buffer_size;
+
+    detail::FormatOneChar('s', &cursor);
+    detail::FormatOneChar('m', &cursor);
+    detail::FormatAllDigits(detail::Abs(interval.milliseconds), &cursor);
+    if (interval.milliseconds < 0) detail::FormatOneChar('-', &cursor);
+
+    detail::FormatOneChar('d', &cursor);
+    detail::FormatAllDigits(detail::Abs(interval.days), &cursor);
+    if (interval.days < 0) detail::FormatOneChar('-', &cursor);
+
+    return append(detail::ViewDigitBuffer(buffer, cursor));
+  }
+};
+
+template <>
+class StringFormatter<MonthDayNanoIntervalType> {
+ public:
+  using value_type = MonthDayNanoIntervalType::MonthDayNanos;
+
+  explicit StringFormatter(const DataType*) {}
+
+  template <typename Appender>
+  Return<Appender> operator()(value_type interval, Appender&& append) {
+    constexpr size_t buffer_size =
+        /*m, d, ns*/ 4 + /*negative signs*/ 3 +
+        /*months/days*/ 2 * detail::Digits10(std::numeric_limits<int32_t>::max()) +
+        /*nanoseconds*/ detail::Digits10(std::numeric_limits<int64_t>::max());
+    std::array<char, buffer_size> buffer;
+    char* cursor = buffer.data() + buffer_size;
+
+    detail::FormatOneChar('s', &cursor);
+    detail::FormatOneChar('n', &cursor);
+    detail::FormatAllDigits(detail::Abs(interval.nanoseconds), &cursor);
+    if (interval.nanoseconds < 0) detail::FormatOneChar('-', &cursor);
+
+    detail::FormatOneChar('d', &cursor);
+    detail::FormatAllDigits(detail::Abs(interval.days), &cursor);
+    if (interval.days < 0) detail::FormatOneChar('-', &cursor);
+
+    detail::FormatOneChar('M', &cursor);
+    detail::FormatAllDigits(detail::Abs(interval.months), &cursor);
+    if (interval.months < 0) detail::FormatOneChar('-', &cursor);
+
+    return append(detail::ViewDigitBuffer(buffer, cursor));
+  }
+};
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/functional.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/functional.h
@@ -0,0 +1,160 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <tuple>
+#include <type_traits>
+
+#include "arrow/result.h"
+#include "arrow/util/macros.h"
+
+namespace arrow {
+namespace internal {
+
+struct Empty {
+  static Result<Empty> ToResult(Status s) {
+    if (ARROW_PREDICT_TRUE(s.ok())) {
+      return Empty{};
+    }
+    return s;
+  }
+};
+
+/// Helper struct for examining lambdas and other callables.
+/// TODO(ARROW-12655) support function pointers
+struct call_traits {
+ public:
+  template <typename R, typename... A>
+  static std::false_type is_overloaded_impl(R(A...));
+
+  template <typename F>
+  static std::false_type is_overloaded_impl(decltype(&F::operator())*);
+
+  template <typename F>
+  static std::true_type is_overloaded_impl(...);
+
+  template <typename F, typename R, typename... A>
+  static R return_type_impl(R (F::*)(A...));
+
+  template <typename F, typename R, typename... A>
+  static R return_type_impl(R (F::*)(A...) const);
+
+  template <std::size_t I, typename F, typename R, typename... A>
+  static typename std::tuple_element<I, std::tuple<A...>>::type argument_type_impl(
+      R (F::*)(A...));
+
+  template <std::size_t I, typename F, typename R, typename... A>
+  static typename std::tuple_element<I, std::tuple<A...>>::type argument_type_impl(
+      R (F::*)(A...) const);
+
+  template <std::size_t I, typename F, typename R, typename... A>
+  static typename std::tuple_element<I, std::tuple<A...>>::type argument_type_impl(
+      R (F::*)(A...) &&);
+
+  template <typename F, typename R, typename... A>
+  static std::integral_constant<int, sizeof...(A)> argument_count_impl(R (F::*)(A...));
+
+  template <typename F, typename R, typename... A>
+  static std::integral_constant<int, sizeof...(A)> argument_count_impl(R (F::*)(A...)
+                                                                           const);
+
+  template <typename F, typename R, typename... A>
+  static std::integral_constant<int, sizeof...(A)> argument_count_impl(R (F::*)(A...) &&);
+
+  /// bool constant indicating whether F is a callable with more than one possible
+  /// signature. Will be true_type for objects which define multiple operator() or which
+  /// define a template operator()
+  template <typename F>
+  using is_overloaded =
+      decltype(is_overloaded_impl<typename std::decay<F>::type>(NULLPTR));
+
+  template <typename F, typename T = void>
+  using enable_if_overloaded = typename std::enable_if<is_overloaded<F>::value, T>::type;
+
+  template <typename F, typename T = void>
+  using disable_if_overloaded =
+      typename std::enable_if<!is_overloaded<F>::value, T>::type;
+
+  /// If F is not overloaded, the argument types of its call operator can be
+  /// extracted via call_traits::argument_type<Index, F>
+  template <std::size_t I, typename F>
+  using argument_type = decltype(argument_type_impl<I>(&std::decay<F>::type::operator()));
+
+  template <typename F>
+  using argument_count = decltype(argument_count_impl(&std::decay<F>::type::operator()));
+
+  template <typename F>
+  using return_type = decltype(return_type_impl(&std::decay<F>::type::operator()));
+
+  template <typename F, typename T, typename RT = T>
+  using enable_if_return =
+      typename std::enable_if<std::is_same<return_type<F>, T>::value, RT>;
+
+  template <typename T, typename R = void>
+  using enable_if_empty = typename std::enable_if<std::is_same<T, Empty>::value, R>::type;
+
+  template <typename T, typename R = void>
+  using enable_if_not_empty =
+      typename std::enable_if<!std::is_same<T, Empty>::value, R>::type;
+};
+
+/// A type erased callable object which may only be invoked once.
+/// It can be constructed from any lambda which matches the provided call signature.
+/// Invoking it results in destruction of the lambda, freeing any state/references
+/// immediately. Invoking a default constructed FnOnce or one which has already been
+/// invoked will segfault.
+template <typename Signature>
+class FnOnce;
+
+template <typename R, typename... A>
+class FnOnce<R(A...)> {
+ public:
+  FnOnce() = default;
+
+  template <typename Fn,
+            typename = typename std::enable_if<std::is_convertible<
+                decltype(std::declval<Fn&&>()(std::declval<A>()...)), R>::value>::type>
+  FnOnce(Fn fn) : impl_(new FnImpl<Fn>(std::move(fn))) {  // NOLINT runtime/explicit
+  }
+
+  explicit operator bool() const { return impl_ != NULLPTR; }
+
+  R operator()(A... a) && {
+    auto bye = std::move(impl_);
+    return bye->invoke(std::forward<A&&>(a)...);
+  }
+
+ private:
+  struct Impl {
+    virtual ~Impl() = default;
+    virtual R invoke(A&&... a) = 0;
+  };
+
+  template <typename Fn>
+  struct FnImpl : Impl {
+    explicit FnImpl(Fn fn) : fn_(std::move(fn)) {}
+    R invoke(A&&... a) override { return std::move(fn_)(std::forward<A&&>(a)...); }
+    Fn fn_;
+  };
+
+  std::unique_ptr<Impl> impl_;
+};
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/future.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/future.h
@@ -0,0 +1,882 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <atomic>
+#include <cmath>
+#include <functional>
+#include <memory>
+#include <optional>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/config.h"
+#include "arrow/util/functional.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/tracing.h"
+#include "arrow/util/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+template <typename>
+struct EnsureFuture;
+
+namespace detail {
+
+template <typename>
+struct is_future : std::false_type {};
+
+template <typename T>
+struct is_future<Future<T>> : std::true_type {};
+
+template <typename Signature, typename Enable = void>
+struct result_of;
+
+template <typename Fn, typename... A>
+struct result_of<Fn(A...),
+                 internal::void_t<decltype(std::declval<Fn>()(std::declval<A>()...))>> {
+  using type = decltype(std::declval<Fn>()(std::declval<A>()...));
+};
+
+template <typename Signature>
+using result_of_t = typename result_of<Signature>::type;
+
+// Helper to find the synchronous counterpart for a Future
+template <typename T>
+struct SyncType {
+  using type = Result<T>;
+};
+
+template <>
+struct SyncType<internal::Empty> {
+  using type = Status;
+};
+
+template <typename Fn>
+using first_arg_is_status =
+    std::is_same<typename std::decay<internal::call_traits::argument_type<0, Fn>>::type,
+                 Status>;
+
+template <typename Fn, typename Then, typename Else,
+          typename Count = internal::call_traits::argument_count<Fn>>
+using if_has_no_args = typename std::conditional<Count::value == 0, Then, Else>::type;
+
+/// Creates a callback that can be added to a future to mark a `dest` future finished
+template <typename Source, typename Dest, bool SourceEmpty = Source::is_empty,
+          bool DestEmpty = Dest::is_empty>
+struct MarkNextFinished {};
+
+/// If the source and dest are both empty we can pass on the status
+template <typename Source, typename Dest>
+struct MarkNextFinished<Source, Dest, true, true> {
+  void operator()(const Status& status) && { next.MarkFinished(status); }
+  Dest next;
+};
+
+/// If the source is not empty but the dest is then we can take the
+/// status out of the result
+template <typename Source, typename Dest>
+struct MarkNextFinished<Source, Dest, false, true> {
+  void operator()(const Result<typename Source::ValueType>& res) && {
+    next.MarkFinished(internal::Empty::ToResult(res.status()));
+  }
+  Dest next;
+};
+
+/// If neither are empty we pass on the result
+template <typename Source, typename Dest>
+struct MarkNextFinished<Source, Dest, false, false> {
+  void operator()(const Result<typename Source::ValueType>& res) && {
+    next.MarkFinished(res);
+  }
+  Dest next;
+};
+
+/// Helper that contains information about how to apply a continuation
+struct ContinueFuture {
+  template <typename Return>
+  struct ForReturnImpl;
+
+  template <typename Return>
+  using ForReturn = typename ForReturnImpl<Return>::type;
+
+  template <typename Signature>
+  using ForSignature = ForReturn<result_of_t<Signature>>;
+
+  // If the callback returns void then we return Future<> that always finishes OK.
+  template <typename ContinueFunc, typename... Args,
+            typename ContinueResult = result_of_t<ContinueFunc && (Args && ...)>,
+            typename NextFuture = ForReturn<ContinueResult>>
+  typename std::enable_if<std::is_void<ContinueResult>::value>::type operator()(
+      NextFuture next, ContinueFunc&& f, Args&&... a) const {
+    std::forward<ContinueFunc>(f)(std::forward<Args>(a)...);
+    next.MarkFinished();
+  }
+
+  /// If the callback returns a non-future then we return Future<T>
+  /// and mark the future finished with the callback result.  It will get promoted
+  /// to Result<T> as part of MarkFinished if it isn't already.
+  ///
+  /// If the callback returns Status and we return Future<> then also send the callback
+  /// result as-is to the destination future.
+  template <typename ContinueFunc, typename... Args,
+            typename ContinueResult = result_of_t<ContinueFunc && (Args && ...)>,
+            typename NextFuture = ForReturn<ContinueResult>>
+  typename std::enable_if<
+      !std::is_void<ContinueResult>::value && !is_future<ContinueResult>::value &&
+      (!NextFuture::is_empty || std::is_same<ContinueResult, Status>::value)>::type
+  operator()(NextFuture next, ContinueFunc&& f, Args&&... a) const {
+    next.MarkFinished(std::forward<ContinueFunc>(f)(std::forward<Args>(a)...));
+  }
+
+  /// If the callback returns a Result and the next future is Future<> then we mark
+  /// the future finished with the callback result.
+  ///
+  /// It may seem odd that the next future is Future<> when the callback returns a
+  /// result but this can occur if the OnFailure callback returns a result while the
+  /// OnSuccess callback is void/Status (e.g. you would get this calling the one-arg
+  /// version of Then with an OnSuccess callback that returns void)
+  template <typename ContinueFunc, typename... Args,
+            typename ContinueResult = result_of_t<ContinueFunc && (Args && ...)>,
+            typename NextFuture = ForReturn<ContinueResult>>
+  typename std::enable_if<!std::is_void<ContinueResult>::value &&
+                          !is_future<ContinueResult>::value && NextFuture::is_empty &&
+                          !std::is_same<ContinueResult, Status>::value>::type
+  operator()(NextFuture next, ContinueFunc&& f, Args&&... a) const {
+    next.MarkFinished(std::forward<ContinueFunc>(f)(std::forward<Args>(a)...).status());
+  }
+
+  /// If the callback returns a Future<T> then we return Future<T>.  We create a new
+  /// future and add a callback to the future given to us by the user that forwards the
+  /// result to the future we just created
+  template <typename ContinueFunc, typename... Args,
+            typename ContinueResult = result_of_t<ContinueFunc && (Args && ...)>,
+            typename NextFuture = ForReturn<ContinueResult>>
+  typename std::enable_if<is_future<ContinueResult>::value>::type operator()(
+      NextFuture next, ContinueFunc&& f, Args&&... a) const {
+    ContinueResult signal_to_complete_next =
+        std::forward<ContinueFunc>(f)(std::forward<Args>(a)...);
+    MarkNextFinished<ContinueResult, NextFuture> callback{std::move(next)};
+    signal_to_complete_next.AddCallback(std::move(callback));
+  }
+
+  /// Helpers to conditionally ignore arguments to ContinueFunc
+  template <typename ContinueFunc, typename NextFuture, typename... Args>
+  void IgnoringArgsIf(std::true_type, NextFuture&& next, ContinueFunc&& f,
+                      Args&&...) const {
+    operator()(std::forward<NextFuture>(next), std::forward<ContinueFunc>(f));
+  }
+  template <typename ContinueFunc, typename NextFuture, typename... Args>
+  void IgnoringArgsIf(std::false_type, NextFuture&& next, ContinueFunc&& f,
+                      Args&&... a) const {
+    operator()(std::forward<NextFuture>(next), std::forward<ContinueFunc>(f),
+               std::forward<Args>(a)...);
+  }
+};
+
+/// Helper struct which tells us what kind of Future gets returned from `Then` based on
+/// the return type of the OnSuccess callback
+template <>
+struct ContinueFuture::ForReturnImpl<void> {
+  using type = Future<>;
+};
+
+template <>
+struct ContinueFuture::ForReturnImpl<Status> {
+  using type = Future<>;
+};
+
+template <typename R>
+struct ContinueFuture::ForReturnImpl {
+  using type = Future<R>;
+};
+
+template <typename T>
+struct ContinueFuture::ForReturnImpl<Result<T>> {
+  using type = Future<T>;
+};
+
+template <typename T>
+struct ContinueFuture::ForReturnImpl<Future<T>> {
+  using type = Future<T>;
+};
+
+}  // namespace detail
+
+/// A Future's execution or completion status
+enum class FutureState : int8_t { PENDING, SUCCESS, FAILURE };
+
+inline bool IsFutureFinished(FutureState state) { return state != FutureState::PENDING; }
+
+/// \brief Describe whether the callback should be scheduled or run synchronously
+enum class ShouldSchedule {
+  /// Always run the callback synchronously (the default)
+  Never = 0,
+  /// Schedule a new task only if the future is not finished when the
+  /// callback is added
+  IfUnfinished = 1,
+  /// Always schedule the callback as a new task
+  Always = 2,
+  /// Schedule a new task only if it would run on an executor other than
+  /// the specified executor.
+  IfDifferentExecutor = 3,
+};
+
+/// \brief Options that control how a continuation is run
+struct CallbackOptions {
+  /// Describe whether the callback should be run synchronously or scheduled
+  ShouldSchedule should_schedule = ShouldSchedule::Never;
+  /// If the callback is scheduled then this is the executor it should be scheduled
+  /// on.  If this is NULL then should_schedule must be Never
+  internal::Executor* executor = NULLPTR;
+
+  static CallbackOptions Defaults() { return {}; }
+};
+
+// Untyped private implementation
+class ARROW_EXPORT FutureImpl : public std::enable_shared_from_this<FutureImpl> {
+ public:
+  FutureImpl();
+  virtual ~FutureImpl() = default;
+
+  FutureState state() { return state_.load(); }
+
+  static std::unique_ptr<FutureImpl> Make();
+  static std::unique_ptr<FutureImpl> MakeFinished(FutureState state);
+
+#ifdef ARROW_WITH_OPENTELEMETRY
+  void SetSpan(util::tracing::Span* span) { span_ = span; }
+#endif
+
+  // Future API
+  void MarkFinished();
+  void MarkFailed();
+  void Wait();
+  bool Wait(double seconds);
+  template <typename ValueType>
+  Result<ValueType>* CastResult() const {
+    return static_cast<Result<ValueType>*>(result_.get());
+  }
+
+  using Callback = internal::FnOnce<void(const FutureImpl& impl)>;
+  void AddCallback(Callback callback, CallbackOptions opts);
+  bool TryAddCallback(const std::function<Callback()>& callback_factory,
+                      CallbackOptions opts);
+
+  std::atomic<FutureState> state_{FutureState::PENDING};
+
+  // Type erased storage for arbitrary results
+  // XXX small objects could be stored inline instead of boxed in a pointer
+  using Storage = std::unique_ptr<void, void (*)(void*)>;
+  Storage result_{NULLPTR, NULLPTR};
+
+  struct CallbackRecord {
+    Callback callback;
+    CallbackOptions options;
+  };
+  std::vector<CallbackRecord> callbacks_;
+#ifdef ARROW_WITH_OPENTELEMETRY
+  util::tracing::Span* span_ = NULLPTR;
+#endif
+};
+
+// ---------------------------------------------------------------------
+// Public API
+
+/// \brief EXPERIMENTAL A std::future-like class with more functionality.
+///
+/// A Future represents the results of a past or future computation.
+/// The Future API has two sides: a producer side and a consumer side.
+///
+/// The producer API allows creating a Future and setting its result or
+/// status, possibly after running a computation function.
+///
+/// The consumer API allows querying a Future's current state, wait for it
+/// to complete, and composing futures with callbacks.
+template <typename T>
+class [[nodiscard]] Future {
+ public:
+  using ValueType = T;
+  using SyncType = typename detail::SyncType<T>::type;
+  static constexpr bool is_empty = std::is_same<T, internal::Empty>::value;
+  // The default constructor creates an invalid Future.  Use Future::Make()
+  // for a valid Future.  This constructor is mostly for the convenience
+  // of being able to presize a vector of Futures.
+  Future() = default;
+
+#ifdef ARROW_WITH_OPENTELEMETRY
+  void SetSpan(util::tracing::Span* span) { impl_->SetSpan(span); }
+#endif
+
+  // Consumer API
+
+  bool is_valid() const { return impl_ != NULLPTR; }
+
+  /// \brief Return the Future's current state
+  ///
+  /// A return value of PENDING is only indicative, as the Future can complete
+  /// concurrently.  A return value of FAILURE or SUCCESS is definitive, though.
+  FutureState state() const {
+    CheckValid();
+    return impl_->state();
+  }
+
+  /// \brief Whether the Future is finished
+  ///
+  /// A false return value is only indicative, as the Future can complete
+  /// concurrently.  A true return value is definitive, though.
+  bool is_finished() const {
+    CheckValid();
+    return IsFutureFinished(impl_->state());
+  }
+
+  /// \brief Wait for the Future to complete and return its Result
+  const Result<ValueType>& result() const& {
+    Wait();
+    return *GetResult();
+  }
+
+  /// \brief Returns an rvalue to the result.  This method is potentially unsafe
+  ///
+  /// The future is not the unique owner of the result, copies of a future will
+  /// also point to the same result.  You must make sure that no other copies
+  /// of the future exist.  Attempts to add callbacks after you move the result
+  /// will result in undefined behavior.
+  Result<ValueType>&& MoveResult() {
+    Wait();
+    return std::move(*GetResult());
+  }
+
+  /// \brief Wait for the Future to complete and return its Status
+  const Status& status() const { return result().status(); }
+
+  /// \brief Future<T> is convertible to Future<>, which views only the
+  /// Status of the original. Marking the returned Future Finished is not supported.
+  explicit operator Future<>() const {
+    Future<> status_future;
+    status_future.impl_ = impl_;
+    return status_future;
+  }
+
+  /// \brief Wait for the Future to complete
+  void Wait() const {
+    CheckValid();
+    impl_->Wait();
+  }
+
+  /// \brief Wait for the Future to complete, or for the timeout to expire
+  ///
+  /// `true` is returned if the Future completed, `false` if the timeout expired.
+  /// Note a `false` value is only indicative, as the Future can complete
+  /// concurrently.
+  bool Wait(double seconds) const {
+    CheckValid();
+    return impl_->Wait(seconds);
+  }
+
+  // Producer API
+
+  /// \brief Producer API: mark Future finished
+  ///
+  /// The Future's result is set to `res`.
+  void MarkFinished(Result<ValueType> res) { DoMarkFinished(std::move(res)); }
+
+  /// \brief Mark a Future<> completed with the provided Status.
+  template <typename E = ValueType, typename = typename std::enable_if<
+                                        std::is_same<E, internal::Empty>::value>::type>
+  void MarkFinished(Status s = Status::OK()) {
+    return DoMarkFinished(E::ToResult(std::move(s)));
+  }
+
+  /// \brief Producer API: instantiate a valid Future
+  ///
+  /// The Future's state is initialized with PENDING.  If you are creating a future with
+  /// this method you must ensure that future is eventually completed (with success or
+  /// failure).  Creating a future, returning it, and never completing the future can lead
+  /// to memory leaks (for example, see Loop).
+  static Future Make() {
+    Future fut;
+    fut.impl_ = FutureImpl::Make();
+    return fut;
+  }
+
+  /// \brief Producer API: instantiate a finished Future
+  static Future<ValueType> MakeFinished(Result<ValueType> res) {
+    Future<ValueType> fut;
+    fut.InitializeFromResult(std::move(res));
+    return fut;
+  }
+
+  /// \brief Make a finished Future<> with the provided Status.
+  template <typename E = ValueType, typename = typename std::enable_if<
+                                        std::is_same<E, internal::Empty>::value>::type>
+  static Future<> MakeFinished(Status s = Status::OK()) {
+    return MakeFinished(E::ToResult(std::move(s)));
+  }
+
+  struct WrapResultyOnComplete {
+    template <typename OnComplete>
+    struct Callback {
+      void operator()(const FutureImpl& impl) && {
+        std::move(on_complete)(*impl.CastResult<ValueType>());
+      }
+      OnComplete on_complete;
+    };
+  };
+
+  struct WrapStatusyOnComplete {
+    template <typename OnComplete>
+    struct Callback {
+      static_assert(std::is_same<internal::Empty, ValueType>::value,
+                    "Only callbacks for Future<> should accept Status and not Result");
+
+      void operator()(const FutureImpl& impl) && {
+        std::move(on_complete)(impl.CastResult<ValueType>()->status());
+      }
+      OnComplete on_complete;
+    };
+  };
+
+  template <typename OnComplete>
+  using WrapOnComplete = typename std::conditional<
+      detail::first_arg_is_status<OnComplete>::value, WrapStatusyOnComplete,
+      WrapResultyOnComplete>::type::template Callback<OnComplete>;
+
+  /// \brief Consumer API: Register a callback to run when this future completes
+  ///
+  /// The callback should receive the result of the future (const Result<T>&)
+  /// For a void or statusy future this should be (const Status&)
+  ///
+  /// There is no guarantee to the order in which callbacks will run.  In
+  /// particular, callbacks added while the future is being marked complete
+  /// may be executed immediately, ahead of, or even the same time as, other
+  /// callbacks that have been previously added.
+  ///
+  /// WARNING: callbacks may hold arbitrary references, including cyclic references.
+  /// Since callbacks will only be destroyed after they are invoked, this can lead to
+  /// memory leaks if a Future is never marked finished (abandoned):
+  ///
+  /// {
+  ///     auto fut = Future<>::Make();
+  ///     fut.AddCallback([fut]() {});
+  /// }
+  ///
+  /// In this example `fut` falls out of scope but is not destroyed because it holds a
+  /// cyclic reference to itself through the callback.
+  template <typename OnComplete, typename Callback = WrapOnComplete<OnComplete>>
+  void AddCallback(OnComplete on_complete,
+                   CallbackOptions opts = CallbackOptions::Defaults()) const {
+    // We know impl_ will not be dangling when invoking callbacks because at least one
+    // thread will be waiting for MarkFinished to return. Thus it's safe to keep a
+    // weak reference to impl_ here
+    impl_->AddCallback(Callback{std::move(on_complete)}, opts);
+  }
+
+  /// \brief Overload of AddCallback that will return false instead of running
+  /// synchronously
+  ///
+  /// This overload will guarantee the callback is never run synchronously.  If the future
+  /// is already finished then it will simply return false.  This can be useful to avoid
+  /// stack overflow in a situation where you have recursive Futures.  For an example
+  /// see the Loop function
+  ///
+  /// Takes in a callback factory function to allow moving callbacks (the factory function
+  /// will only be called if the callback can successfully be added)
+  ///
+  /// Returns true if a callback was actually added and false if the callback failed
+  /// to add because the future was marked complete.
+  template <typename CallbackFactory,
+            typename OnComplete = detail::result_of_t<CallbackFactory()>,
+            typename Callback = WrapOnComplete<OnComplete>>
+  bool TryAddCallback(CallbackFactory callback_factory,
+                      CallbackOptions opts = CallbackOptions::Defaults()) const {
+    return impl_->TryAddCallback([&]() { return Callback{callback_factory()}; }, opts);
+  }
+
+  template <typename OnSuccess, typename OnFailure>
+  struct ThenOnComplete {
+    static constexpr bool has_no_args =
+        internal::call_traits::argument_count<OnSuccess>::value == 0;
+
+    using ContinuedFuture = detail::ContinueFuture::ForSignature<
+        detail::if_has_no_args<OnSuccess, OnSuccess && (), OnSuccess && (const T&)>>;
+
+    static_assert(
+        std::is_same<detail::ContinueFuture::ForSignature<OnFailure && (const Status&)>,
+                     ContinuedFuture>::value,
+        "OnSuccess and OnFailure must continue with the same future type");
+
+    struct DummyOnSuccess {
+      void operator()(const T&);
+    };
+    using OnSuccessArg = typename std::decay<internal::call_traits::argument_type<
+        0, detail::if_has_no_args<OnSuccess, DummyOnSuccess, OnSuccess>>>::type;
+
+    static_assert(
+        !std::is_same<OnSuccessArg, typename EnsureResult<OnSuccessArg>::type>::value,
+        "OnSuccess' argument should not be a Result");
+
+    void operator()(const Result<T>& result) && {
+      detail::ContinueFuture continue_future;
+      if (ARROW_PREDICT_TRUE(result.ok())) {
+        // move on_failure to a(n immediately destroyed) temporary to free its resources
+        ARROW_UNUSED(OnFailure(std::move(on_failure)));
+        continue_future.IgnoringArgsIf(
+            detail::if_has_no_args<OnSuccess, std::true_type, std::false_type>{},
+            std::move(next), std::move(on_success), result.ValueOrDie());
+      } else {
+        ARROW_UNUSED(OnSuccess(std::move(on_success)));
+        continue_future(std::move(next), std::move(on_failure), result.status());
+      }
+    }
+
+    OnSuccess on_success;
+    OnFailure on_failure;
+    ContinuedFuture next;
+  };
+
+  template <typename OnSuccess>
+  struct PassthruOnFailure {
+    using ContinuedFuture = detail::ContinueFuture::ForSignature<
+        detail::if_has_no_args<OnSuccess, OnSuccess && (), OnSuccess && (const T&)>>;
+
+    Result<typename ContinuedFuture::ValueType> operator()(const Status& s) { return s; }
+  };
+
+  /// \brief Consumer API: Register a continuation to run when this future completes
+  ///
+  /// The continuation will run in the same thread that called MarkFinished (whatever
+  /// callback is registered with this function will run before MarkFinished returns).
+  /// Avoid long-running callbacks in favor of submitting a task to an Executor and
+  /// returning the future.
+  ///
+  /// Two callbacks are supported:
+  /// - OnSuccess, called with the result (const ValueType&) on successful completion.
+  ///              for an empty future this will be called with nothing ()
+  /// - OnFailure, called with the error (const Status&) on failed completion.
+  ///              This callback is optional and defaults to a passthru of any errors.
+  ///
+  /// Then() returns a Future whose ValueType is derived from the return type of the
+  /// callbacks. If a callback returns:
+  /// - void, a Future<> will be returned which will completes successfully as soon
+  ///   as the callback runs.
+  /// - Status, a Future<> will be returned which will complete with the returned Status
+  ///   as soon as the callback runs.
+  /// - V or Result<V>, a Future<V> will be returned which will complete with the result
+  ///   of invoking the callback as soon as the callback runs.
+  /// - Future<V>, a Future<V> will be returned which will be marked complete when the
+  ///   future returned by the callback completes (and will complete with the same
+  ///   result).
+  ///
+  /// The continued Future type must be the same for both callbacks.
+  ///
+  /// Note that OnFailure can swallow errors, allowing continued Futures to successfully
+  /// complete even if this Future fails.
+  ///
+  /// If this future is already completed then the callback will be run immediately
+  /// and the returned future may already be marked complete.
+  ///
+  /// See AddCallback for general considerations when writing callbacks.
+  template <typename OnSuccess, typename OnFailure = PassthruOnFailure<OnSuccess>,
+            typename OnComplete = ThenOnComplete<OnSuccess, OnFailure>,
+            typename ContinuedFuture = typename OnComplete::ContinuedFuture>
+  ContinuedFuture Then(OnSuccess on_success, OnFailure on_failure = {},
+                       CallbackOptions options = CallbackOptions::Defaults()) const {
+    auto next = ContinuedFuture::Make();
+    AddCallback(OnComplete{std::forward<OnSuccess>(on_success),
+                           std::forward<OnFailure>(on_failure), next},
+                options);
+    return next;
+  }
+
+  /// \brief Implicit constructor to create a finished future from a value
+  Future(ValueType val) : Future() {  // NOLINT runtime/explicit
+    impl_ = FutureImpl::MakeFinished(FutureState::SUCCESS);
+    SetResult(std::move(val));
+  }
+
+  /// \brief Implicit constructor to create a future from a Result, enabling use
+  ///     of macros like ARROW_ASSIGN_OR_RAISE.
+  Future(Result<ValueType> res) : Future() {  // NOLINT runtime/explicit
+    if (ARROW_PREDICT_TRUE(res.ok())) {
+      impl_ = FutureImpl::MakeFinished(FutureState::SUCCESS);
+    } else {
+      impl_ = FutureImpl::MakeFinished(FutureState::FAILURE);
+    }
+    SetResult(std::move(res));
+  }
+
+  /// \brief Implicit constructor to create a future from a Status, enabling use
+  ///     of macros like ARROW_RETURN_NOT_OK.
+  Future(Status s)  // NOLINT runtime/explicit
+      : Future(Result<ValueType>(std::move(s))) {}
+
+ protected:
+  void InitializeFromResult(Result<ValueType> res) {
+    if (ARROW_PREDICT_TRUE(res.ok())) {
+      impl_ = FutureImpl::MakeFinished(FutureState::SUCCESS);
+    } else {
+      impl_ = FutureImpl::MakeFinished(FutureState::FAILURE);
+    }
+    SetResult(std::move(res));
+  }
+
+  void Initialize() { impl_ = FutureImpl::Make(); }
+
+  Result<ValueType>* GetResult() const { return impl_->CastResult<ValueType>(); }
+
+  void SetResult(Result<ValueType> res) {
+    impl_->result_ = {new Result<ValueType>(std::move(res)),
+                      [](void* p) { delete static_cast<Result<ValueType>*>(p); }};
+  }
+
+  void DoMarkFinished(Result<ValueType> res) {
+    SetResult(std::move(res));
+
+    if (ARROW_PREDICT_TRUE(GetResult()->ok())) {
+      impl_->MarkFinished();
+    } else {
+      impl_->MarkFailed();
+    }
+  }
+
+  void CheckValid() const {
+#ifndef NDEBUG
+    if (!is_valid()) {
+      Status::Invalid("Invalid Future (default-initialized?)").Abort();
+    }
+#endif
+  }
+
+  explicit Future(std::shared_ptr<FutureImpl> impl) : impl_(std::move(impl)) {}
+
+  std::shared_ptr<FutureImpl> impl_;
+
+  friend struct detail::ContinueFuture;
+
+  template <typename U>
+  friend class Future;
+  friend class WeakFuture<T>;
+
+  FRIEND_TEST(FutureRefTest, ChainRemoved);
+  FRIEND_TEST(FutureRefTest, TailRemoved);
+  FRIEND_TEST(FutureRefTest, HeadRemoved);
+};
+
+template <typename T>
+typename Future<T>::SyncType FutureToSync(const Future<T>& fut) {
+  return fut.result();
+}
+
+template <>
+inline typename Future<internal::Empty>::SyncType FutureToSync<internal::Empty>(
+    const Future<internal::Empty>& fut) {
+  return fut.status();
+}
+
+template <>
+inline Future<>::Future(Status s) : Future(internal::Empty::ToResult(std::move(s))) {}
+
+template <typename T>
+class WeakFuture {
+ public:
+  explicit WeakFuture(const Future<T>& future) : impl_(future.impl_) {}
+
+  Future<T> get() { return Future<T>{impl_.lock()}; }
+
+ private:
+  std::weak_ptr<FutureImpl> impl_;
+};
+
+/// \defgroup future-utilities Functions for working with Futures
+/// @{
+
+/// If a Result<Future> holds an error instead of a Future, construct a finished Future
+/// holding that error.
+template <typename T>
+static Future<T> DeferNotOk(Result<Future<T>> maybe_future) {
+  if (ARROW_PREDICT_FALSE(!maybe_future.ok())) {
+    return Future<T>::MakeFinished(std::move(maybe_future).status());
+  }
+  return std::move(maybe_future).MoveValueUnsafe();
+}
+
+/// \brief Create a Future which completes when all of `futures` complete.
+///
+/// The future's result is a vector of the results of `futures`.
+/// Note that this future will never be marked "failed"; failed results
+/// will be stored in the result vector alongside successful results.
+template <typename T>
+Future<std::vector<Result<T>>> All(std::vector<Future<T>> futures) {
+  struct State {
+    explicit State(std::vector<Future<T>> f)
+        : futures(std::move(f)), n_remaining(futures.size()) {}
+
+    std::vector<Future<T>> futures;
+    std::atomic<size_t> n_remaining;
+  };
+
+  if (futures.size() == 0) {
+    return {std::vector<Result<T>>{}};
+  }
+
+  auto state = std::make_shared<State>(std::move(futures));
+
+  auto out = Future<std::vector<Result<T>>>::Make();
+  for (const Future<T>& future : state->futures) {
+    future.AddCallback([state, out](const Result<T>&) mutable {
+      if (state->n_remaining.fetch_sub(1) != 1) return;
+
+      std::vector<Result<T>> results(state->futures.size());
+      for (size_t i = 0; i < results.size(); ++i) {
+        results[i] = state->futures[i].result();
+      }
+      out.MarkFinished(std::move(results));
+    });
+  }
+  return out;
+}
+
+/// \brief Create a Future which completes when all of `futures` complete.
+///
+/// The future will be marked complete if all `futures` complete
+/// successfully. Otherwise, it will be marked failed with the status of
+/// the first failing future.
+ARROW_EXPORT
+Future<> AllComplete(const std::vector<Future<>>& futures);
+
+/// \brief Create a Future which completes when all of `futures` complete.
+///
+/// The future will finish with an ok status if all `futures` finish with
+/// an ok status. Otherwise, it will be marked failed with the status of
+/// one of the failing futures.
+///
+/// Unlike AllComplete this Future will not complete immediately when a
+/// failure occurs.  It will wait until all futures have finished.
+ARROW_EXPORT
+Future<> AllFinished(const std::vector<Future<>>& futures);
+
+/// @}
+
+struct Continue {
+  template <typename T>
+  operator std::optional<T>() && {  // NOLINT explicit
+    return {};
+  }
+};
+
+template <typename T = internal::Empty>
+std::optional<T> Break(T break_value = {}) {
+  return std::optional<T>{std::move(break_value)};
+}
+
+template <typename T = internal::Empty>
+using ControlFlow = std::optional<T>;
+
+/// \brief Loop through an asynchronous sequence
+///
+/// \param[in] iterate A generator of Future<ControlFlow<BreakValue>>. On completion
+/// of each yielded future the resulting ControlFlow will be examined. A Break will
+/// terminate the loop, while a Continue will re-invoke `iterate`.
+///
+/// \return A future which will complete when a Future returned by iterate completes with
+/// a Break
+template <typename Iterate,
+          typename Control = typename detail::result_of_t<Iterate()>::ValueType,
+          typename BreakValueType = typename Control::value_type>
+Future<BreakValueType> Loop(Iterate iterate) {
+  struct Callback {
+    bool CheckForTermination(const Result<Control>& control_res) {
+      if (!control_res.ok()) {
+        break_fut.MarkFinished(control_res.status());
+        return true;
+      }
+      if (control_res->has_value()) {
+        break_fut.MarkFinished(**control_res);
+        return true;
+      }
+      return false;
+    }
+
+    void operator()(const Result<Control>& maybe_control) && {
+      if (CheckForTermination(maybe_control)) return;
+
+      auto control_fut = iterate();
+      while (true) {
+        if (control_fut.TryAddCallback([this]() { return *this; })) {
+          // Adding a callback succeeded; control_fut was not finished
+          // and we must wait to CheckForTermination.
+          return;
+        }
+        // Adding a callback failed; control_fut was finished and we
+        // can CheckForTermination immediately. This also avoids recursion and potential
+        // stack overflow.
+        if (CheckForTermination(control_fut.result())) return;
+
+        control_fut = iterate();
+      }
+    }
+
+    Iterate iterate;
+
+    // If the future returned by control_fut is never completed then we will be hanging on
+    // to break_fut forever even if the listener has given up listening on it.  Instead we
+    // rely on the fact that a producer (the caller of Future<>::Make) is always
+    // responsible for completing the futures they create.
+    // TODO: Could avoid this kind of situation with "future abandonment" similar to mesos
+    Future<BreakValueType> break_fut;
+  };
+
+  auto break_fut = Future<BreakValueType>::Make();
+  auto control_fut = iterate();
+  control_fut.AddCallback(Callback{std::move(iterate), break_fut});
+
+  return break_fut;
+}
+
+inline Future<> ToFuture(Status status) {
+  return Future<>::MakeFinished(std::move(status));
+}
+
+template <typename T>
+Future<T> ToFuture(T value) {
+  return Future<T>::MakeFinished(std::move(value));
+}
+
+template <typename T>
+Future<T> ToFuture(Result<T> maybe_value) {
+  return Future<T>::MakeFinished(std::move(maybe_value));
+}
+
+template <typename T>
+Future<T> ToFuture(Future<T> fut) {
+  return std::move(fut);
+}
+
+template <typename T>
+struct EnsureFuture {
+  using type = decltype(ToFuture(std::declval<T>()));
+};
+
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/hash_util.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/hash_util.h
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+namespace arrow {
+namespace internal {
+
+// ----------------------------------------------------------------------
+// BEGIN Hash utilities from Boost
+
+namespace detail {
+
+#if defined(_MSC_VER)
+#define ARROW_HASH_ROTL32(x, r) _rotl(x, r)
+#else
+#define ARROW_HASH_ROTL32(x, r) (x << r) | (x >> (32 - r))
+#endif
+
+template <typename SizeT>
+inline void hash_combine_impl(SizeT& seed, SizeT value) {
+  seed ^= value + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+}
+
+inline void hash_combine_impl(uint32_t& h1, uint32_t k1) {
+  const uint32_t c1 = 0xcc9e2d51;
+  const uint32_t c2 = 0x1b873593;
+
+  k1 *= c1;
+  k1 = ARROW_HASH_ROTL32(k1, 15);
+  k1 *= c2;
+
+  h1 ^= k1;
+  h1 = ARROW_HASH_ROTL32(h1, 13);
+  h1 = h1 * 5 + 0xe6546b64;
+}
+
+#undef ARROW_HASH_ROTL32
+
+}  // namespace detail
+
+template <class T>
+inline void hash_combine(std::size_t& seed, T const& v) {
+  std::hash<T> hasher;
+  return ::arrow::internal::detail::hash_combine_impl(seed, hasher(v));
+}
+
+// END Hash utilities from Boost
+// ----------------------------------------------------------------------
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/hashing.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/hashing.h
@@ -0,0 +1,927 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Private header, not to be exported
+
+#pragma once
+
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <cstdint>
+#include <cstring>
+#include <limits>
+#include <memory>
+#include <string>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "arrow/array/builder_binary.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_builders.h"
+#include "arrow/util/endian.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/ubsan.h"
+
+#define XXH_INLINE_ALL
+
+#include "arrow/vendored/xxhash.h"  // IWYU pragma: keep
+
+namespace arrow {
+namespace internal {
+
+// XXX would it help to have a 32-bit hash value on large datasets?
+typedef uint64_t hash_t;
+
+// Notes about the choice of a hash function.
+// - XXH3 is extremely fast on most data sizes, from small to huge;
+//   faster even than HW CRC-based hashing schemes
+// - our custom hash function for tiny values (< 16 bytes) is still
+//   significantly faster (~30%), at least on this machine and compiler
+
+template <uint64_t AlgNum>
+inline hash_t ComputeStringHash(const void* data, int64_t length);
+
+template <typename Scalar, uint64_t AlgNum>
+struct ScalarHelperBase {
+  static bool CompareScalars(Scalar u, Scalar v) { return u == v; }
+
+  static hash_t ComputeHash(const Scalar& value) {
+    // Generic hash computation for scalars.  Simply apply the string hash
+    // to the bit representation of the value.
+
+    // XXX in the case of FP values, we'd like equal values to have the same hash,
+    // even if they have different bit representations...
+    return ComputeStringHash<AlgNum>(&value, sizeof(value));
+  }
+};
+
+template <typename Scalar, uint64_t AlgNum = 0, typename Enable = void>
+struct ScalarHelper : public ScalarHelperBase<Scalar, AlgNum> {};
+
+template <typename Scalar, uint64_t AlgNum>
+struct ScalarHelper<Scalar, AlgNum, enable_if_t<std::is_integral<Scalar>::value>>
+    : public ScalarHelperBase<Scalar, AlgNum> {
+  // ScalarHelper specialization for integers
+
+  static hash_t ComputeHash(const Scalar& value) {
+    // Faster hash computation for integers.
+
+    // Two of xxhash's prime multipliers (which are chosen for their
+    // bit dispersion properties)
+    static constexpr uint64_t multipliers[] = {11400714785074694791ULL,
+                                               14029467366897019727ULL};
+
+    // Multiplying by the prime number mixes the low bits into the high bits,
+    // then byte-swapping (which is a single CPU instruction) allows the
+    // combined high and low bits to participate in the initial hash table index.
+    auto h = static_cast<hash_t>(value);
+    return bit_util::ByteSwap(multipliers[AlgNum] * h);
+  }
+};
+
+template <typename Scalar, uint64_t AlgNum>
+struct ScalarHelper<Scalar, AlgNum,
+                    enable_if_t<std::is_same<std::string_view, Scalar>::value>>
+    : public ScalarHelperBase<Scalar, AlgNum> {
+  // ScalarHelper specialization for std::string_view
+
+  static hash_t ComputeHash(const std::string_view& value) {
+    return ComputeStringHash<AlgNum>(value.data(), static_cast<int64_t>(value.size()));
+  }
+};
+
+template <typename Scalar, uint64_t AlgNum>
+struct ScalarHelper<Scalar, AlgNum, enable_if_t<std::is_floating_point<Scalar>::value>>
+    : public ScalarHelperBase<Scalar, AlgNum> {
+  // ScalarHelper specialization for reals
+
+  static bool CompareScalars(Scalar u, Scalar v) {
+    if (std::isnan(u)) {
+      // XXX should we do a bit-precise comparison?
+      return std::isnan(v);
+    }
+    return u == v;
+  }
+};
+
+template <uint64_t AlgNum = 0>
+hash_t ComputeStringHash(const void* data, int64_t length) {
+  if (ARROW_PREDICT_TRUE(length <= 16)) {
+    // Specialize for small hash strings, as they are quite common as
+    // hash table keys.  Even XXH3 isn't quite as fast.
+    auto p = reinterpret_cast<const uint8_t*>(data);
+    auto n = static_cast<uint32_t>(length);
+    if (n <= 8) {
+      if (n <= 3) {
+        if (n == 0) {
+          return 1U;
+        }
+        uint32_t x = (n << 24) ^ (p[0] << 16) ^ (p[n / 2] << 8) ^ p[n - 1];
+        return ScalarHelper<uint32_t, AlgNum>::ComputeHash(x);
+      }
+      // 4 <= length <= 8
+      // We can read the string as two overlapping 32-bit ints, apply
+      // different hash functions to each of them in parallel, then XOR
+      // the results
+      uint32_t x, y;
+      hash_t hx, hy;
+      x = util::SafeLoadAs<uint32_t>(p + n - 4);
+      y = util::SafeLoadAs<uint32_t>(p);
+      hx = ScalarHelper<uint32_t, AlgNum>::ComputeHash(x);
+      hy = ScalarHelper<uint32_t, AlgNum ^ 1>::ComputeHash(y);
+      return n ^ hx ^ hy;
+    }
+    // 8 <= length <= 16
+    // Apply the same principle as above
+    uint64_t x, y;
+    hash_t hx, hy;
+    x = util::SafeLoadAs<uint64_t>(p + n - 8);
+    y = util::SafeLoadAs<uint64_t>(p);
+    hx = ScalarHelper<uint64_t, AlgNum>::ComputeHash(x);
+    hy = ScalarHelper<uint64_t, AlgNum ^ 1>::ComputeHash(y);
+    return n ^ hx ^ hy;
+  }
+
+#if XXH3_SECRET_SIZE_MIN != 136
+#error XXH3_SECRET_SIZE_MIN changed, please fix kXxh3Secrets
+#endif
+
+  // XXH3_64bits_withSeed generates a secret based on the seed, which is too slow.
+  // Instead, we use hard-coded random secrets.  To maximize cache efficiency,
+  // they reuse the same memory area.
+  static constexpr unsigned char kXxh3Secrets[XXH3_SECRET_SIZE_MIN + 1] = {
+      0xe7, 0x8b, 0x13, 0xf9, 0xfc, 0xb5, 0x8e, 0xef, 0x81, 0x48, 0x2c, 0xbf, 0xf9, 0x9f,
+      0xc1, 0x1e, 0x43, 0x6d, 0xbf, 0xa6, 0x6d, 0xb5, 0x72, 0xbc, 0x97, 0xd8, 0x61, 0x24,
+      0x0f, 0x12, 0xe3, 0x05, 0x21, 0xf7, 0x5c, 0x66, 0x67, 0xa5, 0x65, 0x03, 0x96, 0x26,
+      0x69, 0xd8, 0x29, 0x20, 0xf8, 0xc7, 0xb0, 0x3d, 0xdd, 0x7d, 0x18, 0xa0, 0x60, 0x75,
+      0x92, 0xa4, 0xce, 0xba, 0xc0, 0x77, 0xf4, 0xac, 0xb7, 0x03, 0x53, 0xf0, 0x98, 0xce,
+      0xe6, 0x2b, 0x20, 0xc7, 0x82, 0x91, 0xab, 0xbf, 0x68, 0x5c, 0x62, 0x4d, 0x33, 0xa3,
+      0xe1, 0xb3, 0xff, 0x97, 0x54, 0x4c, 0x44, 0x34, 0xb5, 0xb9, 0x32, 0x4c, 0x75, 0x42,
+      0x89, 0x53, 0x94, 0xd4, 0x9f, 0x2b, 0x76, 0x4d, 0x4e, 0xe6, 0xfa, 0x15, 0x3e, 0xc1,
+      0xdb, 0x71, 0x4b, 0x2c, 0x94, 0xf5, 0xfc, 0x8c, 0x89, 0x4b, 0xfb, 0xc1, 0x82, 0xa5,
+      0x6a, 0x53, 0xf9, 0x4a, 0xba, 0xce, 0x1f, 0xc0, 0x97, 0x1a, 0x87};
+
+  static_assert(AlgNum < 2, "AlgNum too large");
+  static constexpr auto secret = kXxh3Secrets + AlgNum;
+  return XXH3_64bits_withSecret(data, static_cast<size_t>(length), secret,
+                                XXH3_SECRET_SIZE_MIN);
+}
+
+// XXX add a HashEq<ArrowType> struct with both hash and compare functions?
+
+// ----------------------------------------------------------------------
+// An open-addressing insert-only hash table (no deletes)
+
+template <typename Payload>
+class HashTable {
+ public:
+  static constexpr hash_t kSentinel = 0ULL;
+  static constexpr int64_t kLoadFactor = 2UL;
+
+  struct Entry {
+    hash_t h;
+    Payload payload;
+
+    // An entry is valid if the hash is different from the sentinel value
+    operator bool() const { return h != kSentinel; }
+  };
+
+  HashTable(MemoryPool* pool, uint64_t capacity) : entries_builder_(pool) {
+    DCHECK_NE(pool, nullptr);
+    // Minimum of 32 elements
+    capacity = std::max<uint64_t>(capacity, 32UL);
+    capacity_ = bit_util::NextPower2(capacity);
+    capacity_mask_ = capacity_ - 1;
+    size_ = 0;
+
+    DCHECK_OK(UpsizeBuffer(capacity_));
+  }
+
+  // Lookup with non-linear probing
+  // cmp_func should have signature bool(const Payload*).
+  // Return a (Entry*, found) pair.
+  template <typename CmpFunc>
+  std::pair<Entry*, bool> Lookup(hash_t h, CmpFunc&& cmp_func) {
+    auto p = Lookup<DoCompare, CmpFunc>(h, entries_, capacity_mask_,
+                                        std::forward<CmpFunc>(cmp_func));
+    return {&entries_[p.first], p.second};
+  }
+
+  template <typename CmpFunc>
+  std::pair<const Entry*, bool> Lookup(hash_t h, CmpFunc&& cmp_func) const {
+    auto p = Lookup<DoCompare, CmpFunc>(h, entries_, capacity_mask_,
+                                        std::forward<CmpFunc>(cmp_func));
+    return {&entries_[p.first], p.second};
+  }
+
+  Status Insert(Entry* entry, hash_t h, const Payload& payload) {
+    // Ensure entry is empty before inserting
+    assert(!*entry);
+    entry->h = FixHash(h);
+    entry->payload = payload;
+    ++size_;
+
+    if (ARROW_PREDICT_FALSE(NeedUpsizing())) {
+      // Resize less frequently since it is expensive
+      return Upsize(capacity_ * kLoadFactor * 2);
+    }
+    return Status::OK();
+  }
+
+  uint64_t size() const { return size_; }
+
+  // Visit all non-empty entries in the table
+  // The visit_func should have signature void(const Entry*)
+  template <typename VisitFunc>
+  void VisitEntries(VisitFunc&& visit_func) const {
+    for (uint64_t i = 0; i < capacity_; i++) {
+      const auto& entry = entries_[i];
+      if (entry) {
+        visit_func(&entry);
+      }
+    }
+  }
+
+ protected:
+  // NoCompare is for when the value is known not to exist in the table
+  enum CompareKind { DoCompare, NoCompare };
+
+  // The workhorse lookup function
+  template <CompareKind CKind, typename CmpFunc>
+  std::pair<uint64_t, bool> Lookup(hash_t h, const Entry* entries, uint64_t size_mask,
+                                   CmpFunc&& cmp_func) const {
+    static constexpr uint8_t perturb_shift = 5;
+
+    uint64_t index, perturb;
+    const Entry* entry;
+
+    h = FixHash(h);
+    index = h & size_mask;
+    perturb = (h >> perturb_shift) + 1U;
+
+    while (true) {
+      entry = &entries[index];
+      if (CompareEntry<CKind, CmpFunc>(h, entry, std::forward<CmpFunc>(cmp_func))) {
+        // Found
+        return {index, true};
+      }
+      if (entry->h == kSentinel) {
+        // Empty slot
+        return {index, false};
+      }
+
+      // Perturbation logic inspired from CPython's set / dict object.
+      // The goal is that all 64 bits of the unmasked hash value eventually
+      // participate in the probing sequence, to minimize clustering.
+      index = (index + perturb) & size_mask;
+      perturb = (perturb >> perturb_shift) + 1U;
+    }
+  }
+
+  template <CompareKind CKind, typename CmpFunc>
+  bool CompareEntry(hash_t h, const Entry* entry, CmpFunc&& cmp_func) const {
+    if (CKind == NoCompare) {
+      return false;
+    } else {
+      return entry->h == h && cmp_func(&entry->payload);
+    }
+  }
+
+  bool NeedUpsizing() const {
+    // Keep the load factor <= 1/2
+    return size_ * kLoadFactor >= capacity_;
+  }
+
+  Status UpsizeBuffer(uint64_t capacity) {
+    RETURN_NOT_OK(entries_builder_.Resize(capacity));
+    entries_ = entries_builder_.mutable_data();
+    memset(static_cast<void*>(entries_), 0, capacity * sizeof(Entry));
+
+    return Status::OK();
+  }
+
+  Status Upsize(uint64_t new_capacity) {
+    assert(new_capacity > capacity_);
+    uint64_t new_mask = new_capacity - 1;
+    assert((new_capacity & new_mask) == 0);  // it's a power of two
+
+    // Stash old entries and seal builder, effectively resetting the Buffer
+    const Entry* old_entries = entries_;
+    ARROW_ASSIGN_OR_RAISE(auto previous, entries_builder_.FinishWithLength(capacity_));
+    // Allocate new buffer
+    RETURN_NOT_OK(UpsizeBuffer(new_capacity));
+
+    for (uint64_t i = 0; i < capacity_; i++) {
+      const auto& entry = old_entries[i];
+      if (entry) {
+        // Dummy compare function will not be called
+        auto p = Lookup<NoCompare>(entry.h, entries_, new_mask,
+                                   [](const Payload*) { return false; });
+        // Lookup<NoCompare> (and CompareEntry<NoCompare>) ensure that an
+        // empty slots is always returned
+        assert(!p.second);
+        entries_[p.first] = entry;
+      }
+    }
+    capacity_ = new_capacity;
+    capacity_mask_ = new_mask;
+
+    return Status::OK();
+  }
+
+  hash_t FixHash(hash_t h) const { return (h == kSentinel) ? 42U : h; }
+
+  // The number of slots available in the hash table array.
+  uint64_t capacity_;
+  uint64_t capacity_mask_;
+  // The number of used slots in the hash table array.
+  uint64_t size_;
+
+  Entry* entries_;
+  TypedBufferBuilder<Entry> entries_builder_;
+};
+
+// XXX typedef memo_index_t int32_t ?
+
+constexpr int32_t kKeyNotFound = -1;
+
+// ----------------------------------------------------------------------
+// A base class for memoization table.
+
+class MemoTable {
+ public:
+  virtual ~MemoTable() = default;
+
+  virtual int32_t size() const = 0;
+};
+
+// ----------------------------------------------------------------------
+// A memoization table for memory-cheap scalar values.
+
+// The memoization table remembers and allows to look up the insertion
+// index for each key.
+
+template <typename Scalar, template <class> class HashTableTemplateType = HashTable>
+class ScalarMemoTable : public MemoTable {
+ public:
+  explicit ScalarMemoTable(MemoryPool* pool, int64_t entries = 0)
+      : hash_table_(pool, static_cast<uint64_t>(entries)) {}
+
+  int32_t Get(const Scalar& value) const {
+    auto cmp_func = [value](const Payload* payload) -> bool {
+      return ScalarHelper<Scalar, 0>::CompareScalars(payload->value, value);
+    };
+    hash_t h = ComputeHash(value);
+    auto p = hash_table_.Lookup(h, cmp_func);
+    if (p.second) {
+      return p.first->payload.memo_index;
+    } else {
+      return kKeyNotFound;
+    }
+  }
+
+  template <typename Func1, typename Func2>
+  Status GetOrInsert(const Scalar& value, Func1&& on_found, Func2&& on_not_found,
+                     int32_t* out_memo_index) {
+    auto cmp_func = [value](const Payload* payload) -> bool {
+      return ScalarHelper<Scalar, 0>::CompareScalars(value, payload->value);
+    };
+    hash_t h = ComputeHash(value);
+    auto p = hash_table_.Lookup(h, cmp_func);
+    int32_t memo_index;
+    if (p.second) {
+      memo_index = p.first->payload.memo_index;
+      on_found(memo_index);
+    } else {
+      memo_index = size();
+      RETURN_NOT_OK(hash_table_.Insert(p.first, h, {value, memo_index}));
+      on_not_found(memo_index);
+    }
+    *out_memo_index = memo_index;
+    return Status::OK();
+  }
+
+  Status GetOrInsert(const Scalar& value, int32_t* out_memo_index) {
+    return GetOrInsert(
+        value, [](int32_t i) {}, [](int32_t i) {}, out_memo_index);
+  }
+
+  int32_t GetNull() const { return null_index_; }
+
+  template <typename Func1, typename Func2>
+  int32_t GetOrInsertNull(Func1&& on_found, Func2&& on_not_found) {
+    int32_t memo_index = GetNull();
+    if (memo_index != kKeyNotFound) {
+      on_found(memo_index);
+    } else {
+      null_index_ = memo_index = size();
+      on_not_found(memo_index);
+    }
+    return memo_index;
+  }
+
+  int32_t GetOrInsertNull() {
+    return GetOrInsertNull([](int32_t i) {}, [](int32_t i) {});
+  }
+
+  // The number of entries in the memo table +1 if null was added.
+  // (which is also 1 + the largest memo index)
+  int32_t size() const override {
+    return static_cast<int32_t>(hash_table_.size()) + (GetNull() != kKeyNotFound);
+  }
+
+  // Copy values starting from index `start` into `out_data`
+  void CopyValues(int32_t start, Scalar* out_data) const {
+    hash_table_.VisitEntries([=](const HashTableEntry* entry) {
+      int32_t index = entry->payload.memo_index - start;
+      if (index >= 0) {
+        out_data[index] = entry->payload.value;
+      }
+    });
+    // Zero-initialize the null entry
+    if (null_index_ != kKeyNotFound) {
+      int32_t index = null_index_ - start;
+      if (index >= 0) {
+        out_data[index] = Scalar{};
+      }
+    }
+  }
+
+  void CopyValues(Scalar* out_data) const { CopyValues(0, out_data); }
+
+ protected:
+  struct Payload {
+    Scalar value;
+    int32_t memo_index;
+  };
+
+  using HashTableType = HashTableTemplateType<Payload>;
+  using HashTableEntry = typename HashTableType::Entry;
+  HashTableType hash_table_;
+  int32_t null_index_ = kKeyNotFound;
+
+  hash_t ComputeHash(const Scalar& value) const {
+    return ScalarHelper<Scalar, 0>::ComputeHash(value);
+  }
+
+ public:
+  // defined here so that `HashTableType` is visible
+  // Merge entries from `other_table` into `this->hash_table_`.
+  Status MergeTable(const ScalarMemoTable& other_table) {
+    const HashTableType& other_hashtable = other_table.hash_table_;
+
+    other_hashtable.VisitEntries([this](const HashTableEntry* other_entry) {
+      int32_t unused;
+      DCHECK_OK(this->GetOrInsert(other_entry->payload.value, &unused));
+    });
+    // TODO: ARROW-17074 - implement proper error handling
+    return Status::OK();
+  }
+};
+
+// ----------------------------------------------------------------------
+// A memoization table for small scalar values, using direct indexing
+
+template <typename Scalar, typename Enable = void>
+struct SmallScalarTraits {};
+
+template <>
+struct SmallScalarTraits<bool> {
+  static constexpr int32_t cardinality = 2;
+
+  static uint32_t AsIndex(bool value) { return value ? 1 : 0; }
+};
+
+template <typename Scalar>
+struct SmallScalarTraits<Scalar, enable_if_t<std::is_integral<Scalar>::value>> {
+  using Unsigned = typename std::make_unsigned<Scalar>::type;
+
+  static constexpr int32_t cardinality = 1U + std::numeric_limits<Unsigned>::max();
+
+  static uint32_t AsIndex(Scalar value) { return static_cast<Unsigned>(value); }
+};
+
+template <typename Scalar, template <class> class HashTableTemplateType = HashTable>
+class SmallScalarMemoTable : public MemoTable {
+ public:
+  explicit SmallScalarMemoTable(MemoryPool* pool, int64_t entries = 0) {
+    std::fill(value_to_index_, value_to_index_ + cardinality + 1, kKeyNotFound);
+    index_to_value_.reserve(cardinality);
+  }
+
+  int32_t Get(const Scalar value) const {
+    auto value_index = AsIndex(value);
+    return value_to_index_[value_index];
+  }
+
+  template <typename Func1, typename Func2>
+  Status GetOrInsert(const Scalar value, Func1&& on_found, Func2&& on_not_found,
+                     int32_t* out_memo_index) {
+    auto value_index = AsIndex(value);
+    auto memo_index = value_to_index_[value_index];
+    if (memo_index == kKeyNotFound) {
+      memo_index = static_cast<int32_t>(index_to_value_.size());
+      index_to_value_.push_back(value);
+      value_to_index_[value_index] = memo_index;
+      DCHECK_LT(memo_index, cardinality + 1);
+      on_not_found(memo_index);
+    } else {
+      on_found(memo_index);
+    }
+    *out_memo_index = memo_index;
+    return Status::OK();
+  }
+
+  Status GetOrInsert(const Scalar value, int32_t* out_memo_index) {
+    return GetOrInsert(
+        value, [](int32_t i) {}, [](int32_t i) {}, out_memo_index);
+  }
+
+  int32_t GetNull() const { return value_to_index_[cardinality]; }
+
+  template <typename Func1, typename Func2>
+  int32_t GetOrInsertNull(Func1&& on_found, Func2&& on_not_found) {
+    auto memo_index = GetNull();
+    if (memo_index == kKeyNotFound) {
+      memo_index = value_to_index_[cardinality] = size();
+      index_to_value_.push_back(0);
+      on_not_found(memo_index);
+    } else {
+      on_found(memo_index);
+    }
+    return memo_index;
+  }
+
+  int32_t GetOrInsertNull() {
+    return GetOrInsertNull([](int32_t i) {}, [](int32_t i) {});
+  }
+
+  // The number of entries in the memo table
+  // (which is also 1 + the largest memo index)
+  int32_t size() const override { return static_cast<int32_t>(index_to_value_.size()); }
+
+  // Merge entries from `other_table` into `this`.
+  Status MergeTable(const SmallScalarMemoTable& other_table) {
+    for (const Scalar& other_val : other_table.index_to_value_) {
+      int32_t unused;
+      RETURN_NOT_OK(this->GetOrInsert(other_val, &unused));
+    }
+    return Status::OK();
+  }
+
+  // Copy values starting from index `start` into `out_data`
+  void CopyValues(int32_t start, Scalar* out_data) const {
+    DCHECK_GE(start, 0);
+    DCHECK_LE(static_cast<size_t>(start), index_to_value_.size());
+    int64_t offset = start * static_cast<int32_t>(sizeof(Scalar));
+    memcpy(out_data, index_to_value_.data() + offset, (size() - start) * sizeof(Scalar));
+  }
+
+  void CopyValues(Scalar* out_data) const { CopyValues(0, out_data); }
+
+  const std::vector<Scalar>& values() const { return index_to_value_; }
+
+ protected:
+  static constexpr auto cardinality = SmallScalarTraits<Scalar>::cardinality;
+  static_assert(cardinality <= 256, "cardinality too large for direct-addressed table");
+
+  uint32_t AsIndex(Scalar value) const {
+    return SmallScalarTraits<Scalar>::AsIndex(value);
+  }
+
+  // The last index is reserved for the null element.
+  int32_t value_to_index_[cardinality + 1];
+  std::vector<Scalar> index_to_value_;
+};
+
+// ----------------------------------------------------------------------
+// A memoization table for variable-sized binary data.
+
+template <typename BinaryBuilderT>
+class BinaryMemoTable : public MemoTable {
+ public:
+  using builder_offset_type = typename BinaryBuilderT::offset_type;
+  explicit BinaryMemoTable(MemoryPool* pool, int64_t entries = 0,
+                           int64_t values_size = -1)
+      : hash_table_(pool, static_cast<uint64_t>(entries)), binary_builder_(pool) {
+    const int64_t data_size = (values_size < 0) ? entries * 4 : values_size;
+    DCHECK_OK(binary_builder_.Resize(entries));
+    DCHECK_OK(binary_builder_.ReserveData(data_size));
+  }
+
+  int32_t Get(const void* data, builder_offset_type length) const {
+    hash_t h = ComputeStringHash<0>(data, length);
+    auto p = Lookup(h, data, length);
+    if (p.second) {
+      return p.first->payload.memo_index;
+    } else {
+      return kKeyNotFound;
+    }
+  }
+
+  int32_t Get(const std::string_view& value) const {
+    return Get(value.data(), static_cast<builder_offset_type>(value.length()));
+  }
+
+  template <typename Func1, typename Func2>
+  Status GetOrInsert(const void* data, builder_offset_type length, Func1&& on_found,
+                     Func2&& on_not_found, int32_t* out_memo_index) {
+    hash_t h = ComputeStringHash<0>(data, length);
+    auto p = Lookup(h, data, length);
+    int32_t memo_index;
+    if (p.second) {
+      memo_index = p.first->payload.memo_index;
+      on_found(memo_index);
+    } else {
+      memo_index = size();
+      // Insert string value
+      RETURN_NOT_OK(binary_builder_.Append(static_cast<const char*>(data), length));
+      // Insert hash entry
+      RETURN_NOT_OK(
+          hash_table_.Insert(const_cast<HashTableEntry*>(p.first), h, {memo_index}));
+
+      on_not_found(memo_index);
+    }
+    *out_memo_index = memo_index;
+    return Status::OK();
+  }
+
+  template <typename Func1, typename Func2>
+  Status GetOrInsert(const std::string_view& value, Func1&& on_found,
+                     Func2&& on_not_found, int32_t* out_memo_index) {
+    return GetOrInsert(value.data(), static_cast<builder_offset_type>(value.length()),
+                       std::forward<Func1>(on_found), std::forward<Func2>(on_not_found),
+                       out_memo_index);
+  }
+
+  Status GetOrInsert(const void* data, builder_offset_type length,
+                     int32_t* out_memo_index) {
+    return GetOrInsert(
+        data, length, [](int32_t i) {}, [](int32_t i) {}, out_memo_index);
+  }
+
+  Status GetOrInsert(const std::string_view& value, int32_t* out_memo_index) {
+    return GetOrInsert(value.data(), static_cast<builder_offset_type>(value.length()),
+                       out_memo_index);
+  }
+
+  int32_t GetNull() const { return null_index_; }
+
+  template <typename Func1, typename Func2>
+  int32_t GetOrInsertNull(Func1&& on_found, Func2&& on_not_found) {
+    int32_t memo_index = GetNull();
+    if (memo_index == kKeyNotFound) {
+      memo_index = null_index_ = size();
+      DCHECK_OK(binary_builder_.AppendNull());
+      on_not_found(memo_index);
+    } else {
+      on_found(memo_index);
+    }
+    return memo_index;
+  }
+
+  int32_t GetOrInsertNull() {
+    return GetOrInsertNull([](int32_t i) {}, [](int32_t i) {});
+  }
+
+  // The number of entries in the memo table
+  // (which is also 1 + the largest memo index)
+  int32_t size() const override {
+    return static_cast<int32_t>(hash_table_.size() + (GetNull() != kKeyNotFound));
+  }
+
+  int64_t values_size() const { return binary_builder_.value_data_length(); }
+
+  // Copy (n + 1) offsets starting from index `start` into `out_data`
+  template <class Offset>
+  void CopyOffsets(int32_t start, Offset* out_data) const {
+    DCHECK_LE(start, size());
+
+    const builder_offset_type* offsets = binary_builder_.offsets_data();
+    const builder_offset_type delta =
+        start < binary_builder_.length() ? offsets[start] : 0;
+    for (int32_t i = start; i < size(); ++i) {
+      const builder_offset_type adjusted_offset = offsets[i] - delta;
+      Offset cast_offset = static_cast<Offset>(adjusted_offset);
+      assert(static_cast<builder_offset_type>(cast_offset) ==
+             adjusted_offset);  // avoid truncation
+      *out_data++ = cast_offset;
+    }
+
+    // Copy last value since BinaryBuilder only materializes it on in Finish()
+    *out_data = static_cast<Offset>(binary_builder_.value_data_length() - delta);
+  }
+
+  template <class Offset>
+  void CopyOffsets(Offset* out_data) const {
+    CopyOffsets(0, out_data);
+  }
+
+  // Copy values starting from index `start` into `out_data`
+  void CopyValues(int32_t start, uint8_t* out_data) const {
+    CopyValues(start, -1, out_data);
+  }
+
+  // Same as above, but check output size in debug mode
+  void CopyValues(int32_t start, int64_t out_size, uint8_t* out_data) const {
+    DCHECK_LE(start, size());
+
+    // The absolute byte offset of `start` value in the binary buffer.
+    const builder_offset_type offset = binary_builder_.offset(start);
+    const auto length = binary_builder_.value_data_length() - static_cast<size_t>(offset);
+
+    if (out_size != -1) {
+      assert(static_cast<int64_t>(length) <= out_size);
+    }
+
+    auto view = binary_builder_.GetView(start);
+    memcpy(out_data, view.data(), length);
+  }
+
+  void CopyValues(uint8_t* out_data) const { CopyValues(0, -1, out_data); }
+
+  void CopyValues(int64_t out_size, uint8_t* out_data) const {
+    CopyValues(0, out_size, out_data);
+  }
+
+  void CopyFixedWidthValues(int32_t start, int32_t width_size, int64_t out_size,
+                            uint8_t* out_data) const {
+    // This method exists to cope with the fact that the BinaryMemoTable does
+    // not know the fixed width when inserting the null value. The data
+    // buffer hold a zero length string for the null value (if found).
+    //
+    // Thus, the method will properly inject an empty value of the proper width
+    // in the output buffer.
+    //
+    if (start >= size()) {
+      return;
+    }
+
+    int32_t null_index = GetNull();
+    if (null_index < start) {
+      // Nothing to skip, proceed as usual.
+      CopyValues(start, out_size, out_data);
+      return;
+    }
+
+    builder_offset_type left_offset = binary_builder_.offset(start);
+
+    // Ensure that the data length is exactly missing width_size bytes to fit
+    // in the expected output (n_values * width_size).
+#ifndef NDEBUG
+    int64_t data_length = values_size() - static_cast<size_t>(left_offset);
+    assert(data_length + width_size == out_size);
+    ARROW_UNUSED(data_length);
+#endif
+
+    auto in_data = binary_builder_.value_data() + left_offset;
+    // The null use 0-length in the data, slice the data in 2 and skip by
+    // width_size in out_data. [part_1][width_size][part_2]
+    auto null_data_offset = binary_builder_.offset(null_index);
+    auto left_size = null_data_offset - left_offset;
+    if (left_size > 0) {
+      memcpy(out_data, in_data + left_offset, left_size);
+    }
+    // Zero-initialize the null entry
+    memset(out_data + left_size, 0, width_size);
+
+    auto right_size = values_size() - static_cast<size_t>(null_data_offset);
+    if (right_size > 0) {
+      // skip the null fixed size value.
+      auto out_offset = left_size + width_size;
+      assert(out_data + out_offset + right_size == out_data + out_size);
+      memcpy(out_data + out_offset, in_data + null_data_offset, right_size);
+    }
+  }
+
+  // Visit the stored values in insertion order.
+  // The visitor function should have the signature `void(std::string_view)`
+  // or `void(const std::string_view&)`.
+  template <typename VisitFunc>
+  void VisitValues(int32_t start, VisitFunc&& visit) const {
+    for (int32_t i = start; i < size(); ++i) {
+      visit(binary_builder_.GetView(i));
+    }
+  }
+
+ protected:
+  struct Payload {
+    int32_t memo_index;
+  };
+
+  using HashTableType = HashTable<Payload>;
+  using HashTableEntry = typename HashTable<Payload>::Entry;
+  HashTableType hash_table_;
+  BinaryBuilderT binary_builder_;
+
+  int32_t null_index_ = kKeyNotFound;
+
+  std::pair<const HashTableEntry*, bool> Lookup(hash_t h, const void* data,
+                                                builder_offset_type length) const {
+    auto cmp_func = [&](const Payload* payload) {
+      std::string_view lhs = binary_builder_.GetView(payload->memo_index);
+      std::string_view rhs(static_cast<const char*>(data), length);
+      return lhs == rhs;
+    };
+    return hash_table_.Lookup(h, cmp_func);
+  }
+
+ public:
+  Status MergeTable(const BinaryMemoTable& other_table) {
+    other_table.VisitValues(0, [this](const std::string_view& other_value) {
+      int32_t unused;
+      DCHECK_OK(this->GetOrInsert(other_value, &unused));
+    });
+    return Status::OK();
+  }
+};
+
+template <typename T, typename Enable = void>
+struct HashTraits {};
+
+template <>
+struct HashTraits<BooleanType> {
+  using MemoTableType = SmallScalarMemoTable<bool>;
+};
+
+template <typename T>
+struct HashTraits<T, enable_if_8bit_int<T>> {
+  using c_type = typename T::c_type;
+  using MemoTableType = SmallScalarMemoTable<typename T::c_type>;
+};
+
+template <typename T>
+struct HashTraits<T, enable_if_t<has_c_type<T>::value && !is_8bit_int<T>::value>> {
+  using c_type = typename T::c_type;
+  using MemoTableType = ScalarMemoTable<c_type, HashTable>;
+};
+
+template <typename T>
+struct HashTraits<T, enable_if_t<has_string_view<T>::value &&
+                                 !std::is_base_of<LargeBinaryType, T>::value>> {
+  using MemoTableType = BinaryMemoTable<BinaryBuilder>;
+};
+
+template <typename T>
+struct HashTraits<T, enable_if_decimal<T>> {
+  using MemoTableType = BinaryMemoTable<BinaryBuilder>;
+};
+
+template <typename T>
+struct HashTraits<T, enable_if_t<std::is_base_of<LargeBinaryType, T>::value>> {
+  using MemoTableType = BinaryMemoTable<LargeBinaryBuilder>;
+};
+
+template <typename MemoTableType>
+static inline Status ComputeNullBitmap(MemoryPool* pool, const MemoTableType& memo_table,
+                                       int64_t start_offset, int64_t* null_count,
+                                       std::shared_ptr<Buffer>* null_bitmap) {
+  int64_t dict_length = static_cast<int64_t>(memo_table.size()) - start_offset;
+  int64_t null_index = memo_table.GetNull();
+
+  *null_count = 0;
+  *null_bitmap = nullptr;
+
+  if (null_index != kKeyNotFound && null_index >= start_offset) {
+    null_index -= start_offset;
+    *null_count = 1;
+    ARROW_ASSIGN_OR_RAISE(*null_bitmap,
+                          internal::BitmapAllButOne(pool, dict_length, null_index));
+  }
+
+  return Status::OK();
+}
+
+struct StringViewHash {
+  // std::hash compatible hasher for use with std::unordered_*
+  // (the std::hash specialization provided by nonstd constructs std::string
+  // temporaries then invokes std::hash<std::string> against those)
+  hash_t operator()(const std::string_view& value) const {
+    return ComputeStringHash<0>(value.data(), static_cast<int64_t>(value.size()));
+  }
+};
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/int_util.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/int_util.h
@@ -0,0 +1,137 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <type_traits>
+
+#include "arrow/status.h"
+
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class DataType;
+struct ArraySpan;
+struct Scalar;
+
+namespace internal {
+
+ARROW_EXPORT
+uint8_t DetectUIntWidth(const uint64_t* values, int64_t length, uint8_t min_width = 1);
+
+ARROW_EXPORT
+uint8_t DetectUIntWidth(const uint64_t* values, const uint8_t* valid_bytes,
+                        int64_t length, uint8_t min_width = 1);
+
+ARROW_EXPORT
+uint8_t DetectIntWidth(const int64_t* values, int64_t length, uint8_t min_width = 1);
+
+ARROW_EXPORT
+uint8_t DetectIntWidth(const int64_t* values, const uint8_t* valid_bytes, int64_t length,
+                       uint8_t min_width = 1);
+
+ARROW_EXPORT
+void DowncastInts(const int64_t* source, int8_t* dest, int64_t length);
+
+ARROW_EXPORT
+void DowncastInts(const int64_t* source, int16_t* dest, int64_t length);
+
+ARROW_EXPORT
+void DowncastInts(const int64_t* source, int32_t* dest, int64_t length);
+
+ARROW_EXPORT
+void DowncastInts(const int64_t* source, int64_t* dest, int64_t length);
+
+ARROW_EXPORT
+void DowncastUInts(const uint64_t* source, uint8_t* dest, int64_t length);
+
+ARROW_EXPORT
+void DowncastUInts(const uint64_t* source, uint16_t* dest, int64_t length);
+
+ARROW_EXPORT
+void DowncastUInts(const uint64_t* source, uint32_t* dest, int64_t length);
+
+ARROW_EXPORT
+void DowncastUInts(const uint64_t* source, uint64_t* dest, int64_t length);
+
+ARROW_EXPORT
+void UpcastInts(const int32_t* source, int64_t* dest, int64_t length);
+
+template <typename InputInt, typename OutputInt>
+inline typename std::enable_if<(sizeof(InputInt) >= sizeof(OutputInt))>::type CastInts(
+    const InputInt* source, OutputInt* dest, int64_t length) {
+  DowncastInts(source, dest, length);
+}
+
+template <typename InputInt, typename OutputInt>
+inline typename std::enable_if<(sizeof(InputInt) < sizeof(OutputInt))>::type CastInts(
+    const InputInt* source, OutputInt* dest, int64_t length) {
+  UpcastInts(source, dest, length);
+}
+
+template <typename InputInt, typename OutputInt>
+ARROW_EXPORT void TransposeInts(const InputInt* source, OutputInt* dest, int64_t length,
+                                const int32_t* transpose_map);
+
+ARROW_EXPORT
+Status TransposeInts(const DataType& src_type, const DataType& dest_type,
+                     const uint8_t* src, uint8_t* dest, int64_t src_offset,
+                     int64_t dest_offset, int64_t length, const int32_t* transpose_map);
+
+/// \brief Do vectorized boundschecking of integer-type array indices. The
+/// indices must be nonnegative and strictly less than the passed upper
+/// limit (which is usually the length of an array that is being indexed-into).
+ARROW_EXPORT
+Status CheckIndexBounds(const ArraySpan& values, uint64_t upper_limit);
+
+/// \brief Boundscheck integer values to determine if they are all between the
+/// passed upper and lower limits (inclusive). Upper and lower bounds must be
+/// the same type as the data and are not currently casted.
+ARROW_EXPORT
+Status CheckIntegersInRange(const ArraySpan& values, const Scalar& bound_lower,
+                            const Scalar& bound_upper);
+
+/// \brief Use CheckIntegersInRange to determine whether the passed integers
+/// can fit safely in the passed integer type. This helps quickly determine if
+/// integer narrowing (e.g. int64->int32) is safe to do.
+ARROW_EXPORT
+Status IntegersCanFit(const ArraySpan& values, const DataType& target_type);
+
+/// \brief Convenience for boundschecking a single Scalar vlue
+ARROW_EXPORT
+Status IntegersCanFit(const Scalar& value, const DataType& target_type);
+
+/// Upcast an integer to the largest possible width (currently 64 bits)
+
+template <typename Integer>
+typename std::enable_if<
+    std::is_integral<Integer>::value && std::is_signed<Integer>::value, int64_t>::type
+UpcastInt(Integer v) {
+  return v;
+}
+
+template <typename Integer>
+typename std::enable_if<
+    std::is_integral<Integer>::value && std::is_unsigned<Integer>::value, uint64_t>::type
+UpcastInt(Integer v) {
+  return v;
+}
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/int_util_overflow.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/int_util_overflow.h
@@ -0,0 +1,118 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <limits>
+#include <type_traits>
+
+#include "arrow/status.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+// "safe-math.h" includes <intsafe.h> from the Windows headers.
+#include "arrow/util/windows_compatibility.h"
+#include "arrow/vendored/portable-snippets/safe-math.h"
+// clang-format off (avoid include reordering)
+#include "arrow/util/windows_fixup.h"
+// clang-format on
+
+namespace arrow {
+namespace internal {
+
+// Define functions AddWithOverflow, SubtractWithOverflow, MultiplyWithOverflow
+// with the signature `bool(T u, T v, T* out)` where T is an integer type.
+// On overflow, these functions return true.  Otherwise, false is returned
+// and `out` is updated with the result of the operation.
+
+#define OP_WITH_OVERFLOW(_func_name, _psnip_op, _type, _psnip_type) \
+  static inline bool _func_name(_type u, _type v, _type* out) {     \
+    return !psnip_safe_##_psnip_type##_##_psnip_op(out, u, v);      \
+  }
+
+#define OPS_WITH_OVERFLOW(_func_name, _psnip_op)            \
+  OP_WITH_OVERFLOW(_func_name, _psnip_op, int8_t, int8)     \
+  OP_WITH_OVERFLOW(_func_name, _psnip_op, int16_t, int16)   \
+  OP_WITH_OVERFLOW(_func_name, _psnip_op, int32_t, int32)   \
+  OP_WITH_OVERFLOW(_func_name, _psnip_op, int64_t, int64)   \
+  OP_WITH_OVERFLOW(_func_name, _psnip_op, uint8_t, uint8)   \
+  OP_WITH_OVERFLOW(_func_name, _psnip_op, uint16_t, uint16) \
+  OP_WITH_OVERFLOW(_func_name, _psnip_op, uint32_t, uint32) \
+  OP_WITH_OVERFLOW(_func_name, _psnip_op, uint64_t, uint64)
+
+OPS_WITH_OVERFLOW(AddWithOverflow, add)
+OPS_WITH_OVERFLOW(SubtractWithOverflow, sub)
+OPS_WITH_OVERFLOW(MultiplyWithOverflow, mul)
+OPS_WITH_OVERFLOW(DivideWithOverflow, div)
+
+#undef OP_WITH_OVERFLOW
+#undef OPS_WITH_OVERFLOW
+
+// Define function NegateWithOverflow with the signature `bool(T u, T* out)`
+// where T is a signed integer type.  On overflow, these functions return true.
+// Otherwise, false is returned and `out` is updated with the result of the
+// operation.
+
+#define UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, _type, _psnip_type) \
+  static inline bool _func_name(_type u, _type* out) {                    \
+    return !psnip_safe_##_psnip_type##_##_psnip_op(out, u);               \
+  }
+
+#define SIGNED_UNARY_OPS_WITH_OVERFLOW(_func_name, _psnip_op)   \
+  UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, int8_t, int8)   \
+  UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, int16_t, int16) \
+  UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, int32_t, int32) \
+  UNARY_OP_WITH_OVERFLOW(_func_name, _psnip_op, int64_t, int64)
+
+SIGNED_UNARY_OPS_WITH_OVERFLOW(NegateWithOverflow, neg)
+
+#undef UNARY_OP_WITH_OVERFLOW
+#undef SIGNED_UNARY_OPS_WITH_OVERFLOW
+
+/// Signed addition with well-defined behaviour on overflow (as unsigned)
+template <typename SignedInt>
+SignedInt SafeSignedAdd(SignedInt u, SignedInt v) {
+  using UnsignedInt = typename std::make_unsigned<SignedInt>::type;
+  return static_cast<SignedInt>(static_cast<UnsignedInt>(u) +
+                                static_cast<UnsignedInt>(v));
+}
+
+/// Signed subtraction with well-defined behaviour on overflow (as unsigned)
+template <typename SignedInt>
+SignedInt SafeSignedSubtract(SignedInt u, SignedInt v) {
+  using UnsignedInt = typename std::make_unsigned<SignedInt>::type;
+  return static_cast<SignedInt>(static_cast<UnsignedInt>(u) -
+                                static_cast<UnsignedInt>(v));
+}
+
+/// Signed negation with well-defined behaviour on overflow (as unsigned)
+template <typename SignedInt>
+SignedInt SafeSignedNegate(SignedInt u) {
+  using UnsignedInt = typename std::make_unsigned<SignedInt>::type;
+  return static_cast<SignedInt>(~static_cast<UnsignedInt>(u) + 1);
+}
+
+/// Signed left shift with well-defined behaviour on negative numbers or overflow
+template <typename SignedInt, typename Shift>
+SignedInt SafeLeftShift(SignedInt u, Shift shift) {
+  using UnsignedInt = typename std::make_unsigned<SignedInt>::type;
+  return static_cast<SignedInt>(static_cast<UnsignedInt>(u) << shift);
+}
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/io_util.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/io_util.h
@@ -0,0 +1,420 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#ifndef _WIN32
+#define ARROW_HAVE_SIGACTION 1
+#endif
+
+#include <atomic>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#if ARROW_HAVE_SIGACTION
+#include <signal.h>  // Needed for struct sigaction
+#endif
+
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/windows_fixup.h"
+
+namespace arrow {
+namespace internal {
+
+// NOTE: 8-bit path strings on Windows are encoded using UTF-8.
+// Using MBCS would fail encoding some paths.
+
+#if defined(_WIN32)
+using NativePathString = std::wstring;
+#else
+using NativePathString = std::string;
+#endif
+
+class ARROW_EXPORT PlatformFilename {
+ public:
+  struct Impl;
+
+  ~PlatformFilename();
+  PlatformFilename();
+  PlatformFilename(const PlatformFilename&);
+  PlatformFilename(PlatformFilename&&);
+  PlatformFilename& operator=(const PlatformFilename&);
+  PlatformFilename& operator=(PlatformFilename&&);
+  explicit PlatformFilename(NativePathString path);
+  explicit PlatformFilename(const NativePathString::value_type* path);
+
+  const NativePathString& ToNative() const;
+  std::string ToString() const;
+
+  PlatformFilename Parent() const;
+  Result<PlatformFilename> Real() const;
+
+  // These functions can fail for character encoding reasons.
+  static Result<PlatformFilename> FromString(const std::string& file_name);
+  Result<PlatformFilename> Join(const std::string& child_name) const;
+
+  PlatformFilename Join(const PlatformFilename& child_name) const;
+
+  bool operator==(const PlatformFilename& other) const;
+  bool operator!=(const PlatformFilename& other) const;
+
+  // Made public to avoid the proliferation of friend declarations.
+  const Impl* impl() const { return impl_.get(); }
+
+ private:
+  std::unique_ptr<Impl> impl_;
+
+  explicit PlatformFilename(Impl impl);
+};
+
+/// Create a directory if it doesn't exist.
+///
+/// Return whether the directory was created.
+ARROW_EXPORT
+Result<bool> CreateDir(const PlatformFilename& dir_path);
+
+/// Create a directory and its parents if it doesn't exist.
+///
+/// Return whether the directory was created.
+ARROW_EXPORT
+Result<bool> CreateDirTree(const PlatformFilename& dir_path);
+
+/// Delete a directory's contents (but not the directory itself) if it exists.
+///
+/// Return whether the directory existed.
+ARROW_EXPORT
+Result<bool> DeleteDirContents(const PlatformFilename& dir_path,
+                               bool allow_not_found = true);
+
+/// Delete a directory tree if it exists.
+///
+/// Return whether the directory existed.
+ARROW_EXPORT
+Result<bool> DeleteDirTree(const PlatformFilename& dir_path, bool allow_not_found = true);
+
+// Non-recursively list the contents of the given directory.
+// The returned names are the children's base names, not including dir_path.
+ARROW_EXPORT
+Result<std::vector<PlatformFilename>> ListDir(const PlatformFilename& dir_path);
+
+/// Delete a file if it exists.
+///
+/// Return whether the file existed.
+ARROW_EXPORT
+Result<bool> DeleteFile(const PlatformFilename& file_path, bool allow_not_found = true);
+
+/// Return whether a file exists.
+ARROW_EXPORT
+Result<bool> FileExists(const PlatformFilename& path);
+
+// TODO expose this more publicly to make it available from io/file.h?
+/// A RAII wrapper for a file descriptor.
+///
+/// The underlying file descriptor is automatically closed on destruction.
+/// Moving is supported with well-defined semantics.
+/// Furthermore, closing is idempotent.
+class ARROW_EXPORT FileDescriptor {
+ public:
+  FileDescriptor() = default;
+  explicit FileDescriptor(int fd) : fd_(fd) {}
+  FileDescriptor(FileDescriptor&&);
+  FileDescriptor& operator=(FileDescriptor&&);
+
+  ~FileDescriptor();
+
+  Status Close();
+
+  /// May return -1 if closed or default-initialized
+  int fd() const { return fd_.load(); }
+
+  /// Detach and return the underlying file descriptor
+  int Detach();
+
+  bool closed() const { return fd_.load() == -1; }
+
+ protected:
+  static void CloseFromDestructor(int fd);
+
+  std::atomic<int> fd_{-1};
+};
+
+/// Open a file for reading and return a file descriptor.
+ARROW_EXPORT
+Result<FileDescriptor> FileOpenReadable(const PlatformFilename& file_name);
+
+/// Open a file for writing and return a file descriptor.
+ARROW_EXPORT
+Result<FileDescriptor> FileOpenWritable(const PlatformFilename& file_name,
+                                        bool write_only = true, bool truncate = true,
+                                        bool append = false);
+
+/// Read from current file position.  Return number of bytes read.
+ARROW_EXPORT
+Result<int64_t> FileRead(int fd, uint8_t* buffer, int64_t nbytes);
+/// Read from given file position.  Return number of bytes read.
+ARROW_EXPORT
+Result<int64_t> FileReadAt(int fd, uint8_t* buffer, int64_t position, int64_t nbytes);
+
+ARROW_EXPORT
+Status FileWrite(int fd, const uint8_t* buffer, const int64_t nbytes);
+ARROW_EXPORT
+Status FileTruncate(int fd, const int64_t size);
+
+ARROW_EXPORT
+Status FileSeek(int fd, int64_t pos);
+ARROW_EXPORT
+Status FileSeek(int fd, int64_t pos, int whence);
+ARROW_EXPORT
+Result<int64_t> FileTell(int fd);
+ARROW_EXPORT
+Result<int64_t> FileGetSize(int fd);
+
+ARROW_EXPORT
+Status FileClose(int fd);
+
+struct Pipe {
+  FileDescriptor rfd;
+  FileDescriptor wfd;
+
+  Status Close() { return rfd.Close() & wfd.Close(); }
+};
+
+ARROW_EXPORT
+Result<Pipe> CreatePipe();
+
+ARROW_EXPORT
+Status SetPipeFileDescriptorNonBlocking(int fd);
+
+class ARROW_EXPORT SelfPipe {
+ public:
+  static Result<std::shared_ptr<SelfPipe>> Make(bool signal_safe);
+  virtual ~SelfPipe();
+
+  /// \brief Wait for a wakeup.
+  ///
+  /// Status::Invalid is returned if the pipe has been shutdown.
+  /// Otherwise the next sent payload is returned.
+  virtual Result<uint64_t> Wait() = 0;
+
+  /// \brief Wake up the pipe by sending a payload.
+  ///
+  /// This method is async-signal-safe if `signal_safe` was set to true.
+  virtual void Send(uint64_t payload) = 0;
+
+  /// \brief Wake up the pipe and shut it down.
+  virtual Status Shutdown() = 0;
+};
+
+ARROW_EXPORT
+int64_t GetPageSize();
+
+struct MemoryRegion {
+  void* addr;
+  size_t size;
+};
+
+ARROW_EXPORT
+Status MemoryMapRemap(void* addr, size_t old_size, size_t new_size, int fildes,
+                      void** new_addr);
+ARROW_EXPORT
+Status MemoryAdviseWillNeed(const std::vector<MemoryRegion>& regions);
+
+ARROW_EXPORT
+Result<std::string> GetEnvVar(const char* name);
+ARROW_EXPORT
+Result<std::string> GetEnvVar(const std::string& name);
+ARROW_EXPORT
+Result<NativePathString> GetEnvVarNative(const char* name);
+ARROW_EXPORT
+Result<NativePathString> GetEnvVarNative(const std::string& name);
+
+ARROW_EXPORT
+Status SetEnvVar(const char* name, const char* value);
+ARROW_EXPORT
+Status SetEnvVar(const std::string& name, const std::string& value);
+ARROW_EXPORT
+Status DelEnvVar(const char* name);
+ARROW_EXPORT
+Status DelEnvVar(const std::string& name);
+
+ARROW_EXPORT
+std::string ErrnoMessage(int errnum);
+#if _WIN32
+ARROW_EXPORT
+std::string WinErrorMessage(int errnum);
+#endif
+
+ARROW_EXPORT
+std::shared_ptr<StatusDetail> StatusDetailFromErrno(int errnum);
+#if _WIN32
+ARROW_EXPORT
+std::shared_ptr<StatusDetail> StatusDetailFromWinError(int errnum);
+#endif
+ARROW_EXPORT
+std::shared_ptr<StatusDetail> StatusDetailFromSignal(int signum);
+
+template <typename... Args>
+Status StatusFromErrno(int errnum, StatusCode code, Args&&... args) {
+  return Status::FromDetailAndArgs(code, StatusDetailFromErrno(errnum),
+                                   std::forward<Args>(args)...);
+}
+
+template <typename... Args>
+Status IOErrorFromErrno(int errnum, Args&&... args) {
+  return StatusFromErrno(errnum, StatusCode::IOError, std::forward<Args>(args)...);
+}
+
+#if _WIN32
+template <typename... Args>
+Status StatusFromWinError(int errnum, StatusCode code, Args&&... args) {
+  return Status::FromDetailAndArgs(code, StatusDetailFromWinError(errnum),
+                                   std::forward<Args>(args)...);
+}
+
+template <typename... Args>
+Status IOErrorFromWinError(int errnum, Args&&... args) {
+  return StatusFromWinError(errnum, StatusCode::IOError, std::forward<Args>(args)...);
+}
+#endif
+
+template <typename... Args>
+Status StatusFromSignal(int signum, StatusCode code, Args&&... args) {
+  return Status::FromDetailAndArgs(code, StatusDetailFromSignal(signum),
+                                   std::forward<Args>(args)...);
+}
+
+template <typename... Args>
+Status CancelledFromSignal(int signum, Args&&... args) {
+  return StatusFromSignal(signum, StatusCode::Cancelled, std::forward<Args>(args)...);
+}
+
+ARROW_EXPORT
+int ErrnoFromStatus(const Status&);
+
+// Always returns 0 on non-Windows platforms (for Python).
+ARROW_EXPORT
+int WinErrorFromStatus(const Status&);
+
+ARROW_EXPORT
+int SignalFromStatus(const Status&);
+
+class ARROW_EXPORT TemporaryDir {
+ public:
+  ~TemporaryDir();
+
+  /// '/'-terminated path to the temporary dir
+  const PlatformFilename& path() { return path_; }
+
+  /// Create a temporary subdirectory in the system temporary dir,
+  /// named starting with `prefix`.
+  static Result<std::unique_ptr<TemporaryDir>> Make(const std::string& prefix);
+
+ private:
+  PlatformFilename path_;
+
+  explicit TemporaryDir(PlatformFilename&&);
+};
+
+class ARROW_EXPORT SignalHandler {
+ public:
+  typedef void (*Callback)(int);
+
+  SignalHandler();
+  explicit SignalHandler(Callback cb);
+#if ARROW_HAVE_SIGACTION
+  explicit SignalHandler(const struct sigaction& sa);
+#endif
+
+  Callback callback() const;
+#if ARROW_HAVE_SIGACTION
+  const struct sigaction& action() const;
+#endif
+
+ protected:
+#if ARROW_HAVE_SIGACTION
+  // Storing the full sigaction allows to restore the entire signal handling
+  // configuration.
+  struct sigaction sa_;
+#else
+  Callback cb_;
+#endif
+};
+
+/// \brief Return the current handler for the given signal number.
+ARROW_EXPORT
+Result<SignalHandler> GetSignalHandler(int signum);
+
+/// \brief Set a new handler for the given signal number.
+///
+/// The old signal handler is returned.
+ARROW_EXPORT
+Result<SignalHandler> SetSignalHandler(int signum, const SignalHandler& handler);
+
+/// \brief Reinstate the signal handler
+///
+/// For use in signal handlers.  This is needed on platforms without sigaction()
+/// such as Windows, as the default signal handler is restored there as
+/// soon as a signal is raised.
+ARROW_EXPORT
+void ReinstateSignalHandler(int signum, SignalHandler::Callback handler);
+
+/// \brief Send a signal to the current process
+///
+/// The thread which will receive the signal is unspecified.
+ARROW_EXPORT
+Status SendSignal(int signum);
+
+/// \brief Send a signal to the given thread
+///
+/// This function isn't supported on Windows.
+ARROW_EXPORT
+Status SendSignalToThread(int signum, uint64_t thread_id);
+
+/// \brief Get an unpredictable random seed
+///
+/// This function may be slightly costly, so should only be used to initialize
+/// a PRNG, not to generate a large amount of random numbers.
+/// It is better to use this function rather than std::random_device, unless
+/// absolutely necessary (e.g. to generate a cryptographic secret).
+ARROW_EXPORT
+int64_t GetRandomSeed();
+
+/// \brief Get the current thread id
+///
+/// In addition to having the same properties as std::thread, the returned value
+/// is a regular integer value, which is more convenient than an opaque type.
+ARROW_EXPORT
+uint64_t GetThreadId();
+
+/// \brief Get the current memory used by the current process in bytes
+///
+/// This function supports Windows, Linux, and Mac and will return 0 otherwise
+ARROW_EXPORT
+int64_t GetCurrentRSS();
+
+/// \brief Get the total memory available to the system in bytes
+///
+/// This function supports Windows, Linux, and Mac and will return 0 otherwise
+ARROW_EXPORT
+int64_t GetTotalMemoryBytes();
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/iterator.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/iterator.h
@@ -0,0 +1,568 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cassert>
+#include <functional>
+#include <memory>
+#include <optional>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/compare.h"
+#include "arrow/util/functional.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+template <typename T>
+class Iterator;
+
+template <typename T>
+struct IterationTraits {
+  /// \brief a reserved value which indicates the end of iteration. By
+  /// default this is NULLPTR since most iterators yield pointer types.
+  /// Specialize IterationTraits if different end semantics are required.
+  ///
+  /// Note: This should not be used to determine if a given value is a
+  /// terminal value.  Use IsIterationEnd (which uses IsEnd) instead.  This
+  /// is only for returning terminal values.
+  static T End() { return T(NULLPTR); }
+
+  /// \brief Checks to see if the value is a terminal value.
+  /// A method is used here since T is not neccesarily comparable in many
+  /// cases even though it has a distinct final value
+  static bool IsEnd(const T& val) { return val == End(); }
+};
+
+template <typename T>
+T IterationEnd() {
+  return IterationTraits<T>::End();
+}
+
+template <typename T>
+bool IsIterationEnd(const T& val) {
+  return IterationTraits<T>::IsEnd(val);
+}
+
+template <typename T>
+struct IterationTraits<std::optional<T>> {
+  /// \brief by default when iterating through a sequence of optional,
+  /// nullopt indicates the end of iteration.
+  /// Specialize IterationTraits if different end semantics are required.
+  static std::optional<T> End() { return std::nullopt; }
+
+  /// \brief by default when iterating through a sequence of optional,
+  /// nullopt (!has_value()) indicates the end of iteration.
+  /// Specialize IterationTraits if different end semantics are required.
+  static bool IsEnd(const std::optional<T>& val) { return !val.has_value(); }
+
+  // TODO(bkietz) The range-for loop over Iterator<optional<T>> yields
+  // Result<optional<T>> which is unnecessary (since only the unyielded end optional
+  // is nullopt. Add IterationTraits::GetRangeElement() to handle this case
+};
+
+/// \brief A generic Iterator that can return errors
+template <typename T>
+class Iterator : public util::EqualityComparable<Iterator<T>> {
+ public:
+  /// \brief Iterator may be constructed from any type which has a member function
+  /// with signature Result<T> Next();
+  /// End of iterator is signalled by returning IteratorTraits<T>::End();
+  ///
+  /// The argument is moved or copied to the heap and kept in a unique_ptr<void>. Only
+  /// its destructor and its Next method (which are stored in function pointers) are
+  /// referenced after construction.
+  ///
+  /// This approach is used to dodge MSVC linkage hell (ARROW-6244, ARROW-6558) when using
+  /// an abstract template base class: instead of being inlined as usual for a template
+  /// function the base's virtual destructor will be exported, leading to multiple
+  /// definition errors when linking to any other TU where the base is instantiated.
+  template <typename Wrapped>
+  explicit Iterator(Wrapped has_next)
+      : ptr_(new Wrapped(std::move(has_next)), Delete<Wrapped>), next_(Next<Wrapped>) {}
+
+  Iterator() : ptr_(NULLPTR, [](void*) {}) {}
+
+  /// \brief Return the next element of the sequence, IterationTraits<T>::End() when the
+  /// iteration is completed. Calling this on a default constructed Iterator
+  /// will result in undefined behavior.
+  Result<T> Next() { return next_(ptr_.get()); }
+
+  /// Pass each element of the sequence to a visitor. Will return any error status
+  /// returned by the visitor, terminating iteration.
+  template <typename Visitor>
+  Status Visit(Visitor&& visitor) {
+    for (;;) {
+      ARROW_ASSIGN_OR_RAISE(auto value, Next());
+
+      if (IsIterationEnd(value)) break;
+
+      ARROW_RETURN_NOT_OK(visitor(std::move(value)));
+    }
+
+    return Status::OK();
+  }
+
+  /// Iterators will only compare equal if they are both null.
+  /// Equality comparability is required to make an Iterator of Iterators
+  /// (to check for the end condition).
+  bool Equals(const Iterator& other) const { return ptr_ == other.ptr_; }
+
+  explicit operator bool() const { return ptr_ != NULLPTR; }
+
+  class RangeIterator {
+   public:
+    RangeIterator() : value_(IterationTraits<T>::End()) {}
+
+    explicit RangeIterator(Iterator i)
+        : value_(IterationTraits<T>::End()),
+          iterator_(std::make_shared<Iterator>(std::move(i))) {
+      Next();
+    }
+
+    bool operator!=(const RangeIterator& other) const { return value_ != other.value_; }
+
+    RangeIterator& operator++() {
+      Next();
+      return *this;
+    }
+
+    Result<T> operator*() {
+      ARROW_RETURN_NOT_OK(value_.status());
+
+      auto value = std::move(value_);
+      value_ = IterationTraits<T>::End();
+      return value;
+    }
+
+   private:
+    void Next() {
+      if (!value_.ok()) {
+        value_ = IterationTraits<T>::End();
+        return;
+      }
+      value_ = iterator_->Next();
+    }
+
+    Result<T> value_;
+    std::shared_ptr<Iterator> iterator_;
+  };
+
+  RangeIterator begin() { return RangeIterator(std::move(*this)); }
+
+  RangeIterator end() { return RangeIterator(); }
+
+  /// \brief Move every element of this iterator into a vector.
+  Result<std::vector<T>> ToVector() {
+    std::vector<T> out;
+    for (auto maybe_element : *this) {
+      ARROW_ASSIGN_OR_RAISE(auto element, maybe_element);
+      out.push_back(std::move(element));
+    }
+    // ARROW-8193: On gcc-4.8 without the explicit move it tries to use the
+    // copy constructor, which may be deleted on the elements of type T
+    return std::move(out);
+  }
+
+ private:
+  /// Implementation of deleter for ptr_: Casts from void* to the wrapped type and
+  /// deletes that.
+  template <typename HasNext>
+  static void Delete(void* ptr) {
+    delete static_cast<HasNext*>(ptr);
+  }
+
+  /// Implementation of Next: Casts from void* to the wrapped type and invokes that
+  /// type's Next member function.
+  template <typename HasNext>
+  static Result<T> Next(void* ptr) {
+    return static_cast<HasNext*>(ptr)->Next();
+  }
+
+  /// ptr_ is a unique_ptr to void with a custom deleter: a function pointer which first
+  /// casts from void* to a pointer to the wrapped type then deletes that.
+  std::unique_ptr<void, void (*)(void*)> ptr_;
+
+  /// next_ is a function pointer which first casts from void* to a pointer to the wrapped
+  /// type then invokes its Next member function.
+  Result<T> (*next_)(void*) = NULLPTR;
+};
+
+template <typename T>
+struct TransformFlow {
+  using YieldValueType = T;
+
+  TransformFlow(YieldValueType value, bool ready_for_next)
+      : finished_(false),
+        ready_for_next_(ready_for_next),
+        yield_value_(std::move(value)) {}
+  TransformFlow(bool finished, bool ready_for_next)
+      : finished_(finished), ready_for_next_(ready_for_next), yield_value_() {}
+
+  bool HasValue() const { return yield_value_.has_value(); }
+  bool Finished() const { return finished_; }
+  bool ReadyForNext() const { return ready_for_next_; }
+  T Value() const { return *yield_value_; }
+
+  bool finished_ = false;
+  bool ready_for_next_ = false;
+  std::optional<YieldValueType> yield_value_;
+};
+
+struct TransformFinish {
+  template <typename T>
+  operator TransformFlow<T>() && {  // NOLINT explicit
+    return TransformFlow<T>(true, true);
+  }
+};
+
+struct TransformSkip {
+  template <typename T>
+  operator TransformFlow<T>() && {  // NOLINT explicit
+    return TransformFlow<T>(false, true);
+  }
+};
+
+template <typename T>
+TransformFlow<T> TransformYield(T value = {}, bool ready_for_next = true) {
+  return TransformFlow<T>(std::move(value), ready_for_next);
+}
+
+template <typename T, typename V>
+using Transformer = std::function<Result<TransformFlow<V>>(T)>;
+
+template <typename T, typename V>
+class TransformIterator {
+ public:
+  explicit TransformIterator(Iterator<T> it, Transformer<T, V> transformer)
+      : it_(std::move(it)),
+        transformer_(std::move(transformer)),
+        last_value_(),
+        finished_() {}
+
+  Result<V> Next() {
+    while (!finished_) {
+      ARROW_ASSIGN_OR_RAISE(std::optional<V> next, Pump());
+      if (next.has_value()) {
+        return std::move(*next);
+      }
+      ARROW_ASSIGN_OR_RAISE(last_value_, it_.Next());
+    }
+    return IterationTraits<V>::End();
+  }
+
+ private:
+  // Calls the transform function on the current value.  Can return in several ways
+  // * If the next value is requested (e.g. skip) it will return an empty optional
+  // * If an invalid status is encountered that will be returned
+  // * If finished it will return IterationTraits<V>::End()
+  // * If a value is returned by the transformer that will be returned
+  Result<std::optional<V>> Pump() {
+    if (!finished_ && last_value_.has_value()) {
+      auto next_res = transformer_(*last_value_);
+      if (!next_res.ok()) {
+        finished_ = true;
+        return next_res.status();
+      }
+      auto next = *next_res;
+      if (next.ReadyForNext()) {
+        if (IsIterationEnd(*last_value_)) {
+          finished_ = true;
+        }
+        last_value_.reset();
+      }
+      if (next.Finished()) {
+        finished_ = true;
+      }
+      if (next.HasValue()) {
+        return next.Value();
+      }
+    }
+    if (finished_) {
+      return IterationTraits<V>::End();
+    }
+    return std::nullopt;
+  }
+
+  Iterator<T> it_;
+  Transformer<T, V> transformer_;
+  std::optional<T> last_value_;
+  bool finished_ = false;
+};
+
+/// \brief Transforms an iterator according to a transformer, returning a new Iterator.
+///
+/// The transformer will be called on each element of the source iterator and for each
+/// call it can yield a value, skip, or finish the iteration.  When yielding a value the
+/// transformer can choose to consume the source item (the default, ready_for_next = true)
+/// or to keep it and it will be called again on the same value.
+///
+/// This is essentially a more generic form of the map operation that can return 0, 1, or
+/// many values for each of the source items.
+///
+/// The transformer will be exposed to the end of the source sequence
+/// (IterationTraits::End) in case it needs to return some penultimate item(s).
+///
+/// Any invalid status returned by the transformer will be returned immediately.
+template <typename T, typename V>
+Iterator<V> MakeTransformedIterator(Iterator<T> it, Transformer<T, V> op) {
+  return Iterator<V>(TransformIterator<T, V>(std::move(it), std::move(op)));
+}
+
+template <typename T>
+struct IterationTraits<Iterator<T>> {
+  // The end condition for an Iterator of Iterators is a default constructed (null)
+  // Iterator.
+  static Iterator<T> End() { return Iterator<T>(); }
+  static bool IsEnd(const Iterator<T>& val) { return !val; }
+};
+
+template <typename Fn, typename T>
+class FunctionIterator {
+ public:
+  explicit FunctionIterator(Fn fn) : fn_(std::move(fn)) {}
+
+  Result<T> Next() { return fn_(); }
+
+ private:
+  Fn fn_;
+};
+
+/// \brief Construct an Iterator which invokes a callable on Next()
+template <typename Fn,
+          typename Ret = typename internal::call_traits::return_type<Fn>::ValueType>
+Iterator<Ret> MakeFunctionIterator(Fn fn) {
+  return Iterator<Ret>(FunctionIterator<Fn, Ret>(std::move(fn)));
+}
+
+template <typename T>
+Iterator<T> MakeEmptyIterator() {
+  return MakeFunctionIterator([]() -> Result<T> { return IterationTraits<T>::End(); });
+}
+
+template <typename T>
+Iterator<T> MakeErrorIterator(Status s) {
+  return MakeFunctionIterator([s]() -> Result<T> {
+    ARROW_RETURN_NOT_OK(s);
+    return IterationTraits<T>::End();
+  });
+}
+
+/// \brief Simple iterator which yields the elements of a std::vector
+template <typename T>
+class VectorIterator {
+ public:
+  explicit VectorIterator(std::vector<T> v) : elements_(std::move(v)) {}
+
+  Result<T> Next() {
+    if (i_ == elements_.size()) {
+      return IterationTraits<T>::End();
+    }
+    return std::move(elements_[i_++]);
+  }
+
+ private:
+  std::vector<T> elements_;
+  size_t i_ = 0;
+};
+
+template <typename T>
+Iterator<T> MakeVectorIterator(std::vector<T> v) {
+  return Iterator<T>(VectorIterator<T>(std::move(v)));
+}
+
+/// \brief Simple iterator which yields *pointers* to the elements of a std::vector<T>.
+/// This is provided to support T where IterationTraits<T>::End is not specialized
+template <typename T>
+class VectorPointingIterator {
+ public:
+  explicit VectorPointingIterator(std::vector<T> v) : elements_(std::move(v)) {}
+
+  Result<T*> Next() {
+    if (i_ == elements_.size()) {
+      return NULLPTR;
+    }
+    return &elements_[i_++];
+  }
+
+ private:
+  std::vector<T> elements_;
+  size_t i_ = 0;
+};
+
+template <typename T>
+Iterator<T*> MakeVectorPointingIterator(std::vector<T> v) {
+  return Iterator<T*>(VectorPointingIterator<T>(std::move(v)));
+}
+
+/// \brief MapIterator takes ownership of an iterator and a function to apply
+/// on every element. The mapped function is not allowed to fail.
+template <typename Fn, typename I, typename O>
+class MapIterator {
+ public:
+  explicit MapIterator(Fn map, Iterator<I> it)
+      : map_(std::move(map)), it_(std::move(it)) {}
+
+  Result<O> Next() {
+    ARROW_ASSIGN_OR_RAISE(I i, it_.Next());
+
+    if (IsIterationEnd(i)) {
+      return IterationTraits<O>::End();
+    }
+
+    return map_(std::move(i));
+  }
+
+ private:
+  Fn map_;
+  Iterator<I> it_;
+};
+
+/// \brief MapIterator takes ownership of an iterator and a function to apply
+/// on every element. The mapped function is not allowed to fail.
+template <typename Fn, typename From = internal::call_traits::argument_type<0, Fn>,
+          typename To = internal::call_traits::return_type<Fn>>
+Iterator<To> MakeMapIterator(Fn map, Iterator<From> it) {
+  return Iterator<To>(MapIterator<Fn, From, To>(std::move(map), std::move(it)));
+}
+
+/// \brief Like MapIterator, but where the function can fail.
+template <typename Fn, typename From = internal::call_traits::argument_type<0, Fn>,
+          typename To = typename internal::call_traits::return_type<Fn>::ValueType>
+Iterator<To> MakeMaybeMapIterator(Fn map, Iterator<From> it) {
+  return Iterator<To>(MapIterator<Fn, From, To>(std::move(map), std::move(it)));
+}
+
+struct FilterIterator {
+  enum Action { ACCEPT, REJECT };
+
+  template <typename To>
+  static Result<std::pair<To, Action>> Reject() {
+    return std::make_pair(IterationTraits<To>::End(), REJECT);
+  }
+
+  template <typename To>
+  static Result<std::pair<To, Action>> Accept(To out) {
+    return std::make_pair(std::move(out), ACCEPT);
+  }
+
+  template <typename To>
+  static Result<std::pair<To, Action>> MaybeAccept(Result<To> maybe_out) {
+    return std::move(maybe_out).Map(Accept<To>);
+  }
+
+  template <typename To>
+  static Result<std::pair<To, Action>> Error(Status s) {
+    return s;
+  }
+
+  template <typename Fn, typename From, typename To>
+  class Impl {
+   public:
+    explicit Impl(Fn filter, Iterator<From> it) : filter_(filter), it_(std::move(it)) {}
+
+    Result<To> Next() {
+      To out = IterationTraits<To>::End();
+      Action action;
+
+      for (;;) {
+        ARROW_ASSIGN_OR_RAISE(From i, it_.Next());
+
+        if (IsIterationEnd(i)) {
+          return IterationTraits<To>::End();
+        }
+
+        ARROW_ASSIGN_OR_RAISE(std::tie(out, action), filter_(std::move(i)));
+
+        if (action == ACCEPT) return out;
+      }
+    }
+
+   private:
+    Fn filter_;
+    Iterator<From> it_;
+  };
+};
+
+/// \brief Like MapIterator, but where the function can fail or reject elements.
+template <
+    typename Fn, typename From = typename internal::call_traits::argument_type<0, Fn>,
+    typename Ret = typename internal::call_traits::return_type<Fn>::ValueType,
+    typename To = typename std::tuple_element<0, Ret>::type,
+    typename Enable = typename std::enable_if<std::is_same<
+        typename std::tuple_element<1, Ret>::type, FilterIterator::Action>::value>::type>
+Iterator<To> MakeFilterIterator(Fn filter, Iterator<From> it) {
+  return Iterator<To>(
+      FilterIterator::Impl<Fn, From, To>(std::move(filter), std::move(it)));
+}
+
+/// \brief FlattenIterator takes an iterator generating iterators and yields a
+/// unified iterator that flattens/concatenates in a single stream.
+template <typename T>
+class FlattenIterator {
+ public:
+  explicit FlattenIterator(Iterator<Iterator<T>> it) : parent_(std::move(it)) {}
+
+  Result<T> Next() {
+    if (IsIterationEnd(child_)) {
+      // Pop from parent's iterator.
+      ARROW_ASSIGN_OR_RAISE(child_, parent_.Next());
+
+      // Check if final iteration reached.
+      if (IsIterationEnd(child_)) {
+        return IterationTraits<T>::End();
+      }
+
+      return Next();
+    }
+
+    // Pop from child_ and check for depletion.
+    ARROW_ASSIGN_OR_RAISE(T out, child_.Next());
+    if (IsIterationEnd(out)) {
+      // Reset state such that we pop from parent on the recursive call
+      child_ = IterationTraits<Iterator<T>>::End();
+
+      return Next();
+    }
+
+    return out;
+  }
+
+ private:
+  Iterator<Iterator<T>> parent_;
+  Iterator<T> child_ = IterationTraits<Iterator<T>>::End();
+};
+
+template <typename T>
+Iterator<T> MakeFlattenIterator(Iterator<Iterator<T>> it) {
+  return Iterator<T>(FlattenIterator<T>(std::move(it)));
+}
+
+template <typename Reader>
+Iterator<typename Reader::ValueType> MakeIteratorFromReader(
+    const std::shared_ptr<Reader>& reader) {
+  return MakeFunctionIterator([reader] { return reader->Next(); });
+}
+
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/key_value_metadata.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/key_value_metadata.h
@@ -0,0 +1,98 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+/// \brief A container for key-value pair type metadata. Not thread-safe
+class ARROW_EXPORT KeyValueMetadata {
+ public:
+  KeyValueMetadata();
+  KeyValueMetadata(std::vector<std::string> keys, std::vector<std::string> values);
+  explicit KeyValueMetadata(const std::unordered_map<std::string, std::string>& map);
+
+  static std::shared_ptr<KeyValueMetadata> Make(std::vector<std::string> keys,
+                                                std::vector<std::string> values);
+
+  void ToUnorderedMap(std::unordered_map<std::string, std::string>* out) const;
+  void Append(std::string key, std::string value);
+
+  Result<std::string> Get(const std::string& key) const;
+  bool Contains(const std::string& key) const;
+  // Note that deleting may invalidate known indices
+  Status Delete(const std::string& key);
+  Status Delete(int64_t index);
+  Status DeleteMany(std::vector<int64_t> indices);
+  Status Set(const std::string& key, const std::string& value);
+
+  void reserve(int64_t n);
+
+  int64_t size() const;
+  const std::string& key(int64_t i) const;
+  const std::string& value(int64_t i) const;
+  const std::vector<std::string>& keys() const { return keys_; }
+  const std::vector<std::string>& values() const { return values_; }
+
+  std::vector<std::pair<std::string, std::string>> sorted_pairs() const;
+
+  /// \brief Perform linear search for key, returning -1 if not found
+  int FindKey(const std::string& key) const;
+
+  std::shared_ptr<KeyValueMetadata> Copy() const;
+
+  /// \brief Return a new KeyValueMetadata by combining the passed metadata
+  /// with this KeyValueMetadata. Colliding keys will be overridden by the
+  /// passed metadata. Assumes keys in both containers are unique
+  std::shared_ptr<KeyValueMetadata> Merge(const KeyValueMetadata& other) const;
+
+  bool Equals(const KeyValueMetadata& other) const;
+  std::string ToString() const;
+
+ private:
+  std::vector<std::string> keys_;
+  std::vector<std::string> values_;
+
+  ARROW_DISALLOW_COPY_AND_ASSIGN(KeyValueMetadata);
+};
+
+/// \brief Create a KeyValueMetadata instance
+///
+/// \param pairs key-value mapping
+ARROW_EXPORT std::shared_ptr<KeyValueMetadata> key_value_metadata(
+    const std::unordered_map<std::string, std::string>& pairs);
+
+/// \brief Create a KeyValueMetadata instance
+///
+/// \param keys sequence of metadata keys
+/// \param values sequence of corresponding metadata values
+ARROW_EXPORT std::shared_ptr<KeyValueMetadata> key_value_metadata(
+    std::vector<std::string> keys, std::vector<std::string> values);
+
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/launder.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/launder.h
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <new>
+
+namespace arrow {
+namespace internal {
+
+#if __cpp_lib_launder
+using std::launder;
+#else
+template <class T>
+constexpr T* launder(T* p) noexcept {
+  return p;
+}
+#endif
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/logging.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/logging.h
@@ -0,0 +1,259 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#ifdef GANDIVA_IR
+
+// The LLVM IR code doesn't have an NDEBUG mode. And, it shouldn't include references to
+// streams or stdc++. So, making the DCHECK calls void in that case.
+
+#define ARROW_IGNORE_EXPR(expr) ((void)(expr))
+
+#define DCHECK(condition) ARROW_IGNORE_EXPR(condition)
+#define DCHECK_OK(status) ARROW_IGNORE_EXPR(status)
+#define DCHECK_EQ(val1, val2) ARROW_IGNORE_EXPR(val1)
+#define DCHECK_NE(val1, val2) ARROW_IGNORE_EXPR(val1)
+#define DCHECK_LE(val1, val2) ARROW_IGNORE_EXPR(val1)
+#define DCHECK_LT(val1, val2) ARROW_IGNORE_EXPR(val1)
+#define DCHECK_GE(val1, val2) ARROW_IGNORE_EXPR(val1)
+#define DCHECK_GT(val1, val2) ARROW_IGNORE_EXPR(val1)
+
+#else  // !GANDIVA_IR
+
+#include <memory>
+#include <ostream>
+#include <string>
+
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace util {
+
+enum class ArrowLogLevel : int {
+  ARROW_DEBUG = -1,
+  ARROW_INFO = 0,
+  ARROW_WARNING = 1,
+  ARROW_ERROR = 2,
+  ARROW_FATAL = 3
+};
+
+#define ARROW_LOG_INTERNAL(level) ::arrow::util::ArrowLog(__FILE__, __LINE__, level)
+#define ARROW_LOG(level) ARROW_LOG_INTERNAL(::arrow::util::ArrowLogLevel::ARROW_##level)
+
+#define ARROW_IGNORE_EXPR(expr) ((void)(expr))
+
+#define ARROW_CHECK_OR_LOG(condition, level) \
+  ARROW_PREDICT_TRUE(condition)              \
+  ? ARROW_IGNORE_EXPR(0)                     \
+  : ::arrow::util::Voidify() & ARROW_LOG(level) << " Check failed: " #condition " "
+
+#define ARROW_CHECK(condition) ARROW_CHECK_OR_LOG(condition, FATAL)
+
+// If 'to_call' returns a bad status, CHECK immediately with a logged message
+// of 'msg' followed by the status.
+#define ARROW_CHECK_OK_PREPEND(to_call, msg, level)                 \
+  do {                                                              \
+    ::arrow::Status _s = (to_call);                                 \
+    ARROW_CHECK_OR_LOG(_s.ok(), level)                              \
+        << "Operation failed: " << ARROW_STRINGIFY(to_call) << "\n" \
+        << (msg) << ": " << _s.ToString();                          \
+  } while (false)
+
+// If the status is bad, CHECK immediately, appending the status to the
+// logged message.
+#define ARROW_CHECK_OK(s) ARROW_CHECK_OK_PREPEND(s, "Bad status", FATAL)
+
+#define ARROW_CHECK_EQ(val1, val2) ARROW_CHECK((val1) == (val2))
+#define ARROW_CHECK_NE(val1, val2) ARROW_CHECK((val1) != (val2))
+#define ARROW_CHECK_LE(val1, val2) ARROW_CHECK((val1) <= (val2))
+#define ARROW_CHECK_LT(val1, val2) ARROW_CHECK((val1) < (val2))
+#define ARROW_CHECK_GE(val1, val2) ARROW_CHECK((val1) >= (val2))
+#define ARROW_CHECK_GT(val1, val2) ARROW_CHECK((val1) > (val2))
+
+#ifdef NDEBUG
+#define ARROW_DFATAL ::arrow::util::ArrowLogLevel::ARROW_WARNING
+
+// CAUTION: DCHECK_OK() always evaluates its argument, but other DCHECK*() macros
+// only do so in debug mode.
+
+#define ARROW_DCHECK(condition)               \
+  while (false) ARROW_IGNORE_EXPR(condition); \
+  while (false) ::arrow::util::detail::NullLog()
+#define ARROW_DCHECK_OK(s) \
+  ARROW_IGNORE_EXPR(s);    \
+  while (false) ::arrow::util::detail::NullLog()
+#define ARROW_DCHECK_EQ(val1, val2)      \
+  while (false) ARROW_IGNORE_EXPR(val1); \
+  while (false) ARROW_IGNORE_EXPR(val2); \
+  while (false) ::arrow::util::detail::NullLog()
+#define ARROW_DCHECK_NE(val1, val2)      \
+  while (false) ARROW_IGNORE_EXPR(val1); \
+  while (false) ARROW_IGNORE_EXPR(val2); \
+  while (false) ::arrow::util::detail::NullLog()
+#define ARROW_DCHECK_LE(val1, val2)      \
+  while (false) ARROW_IGNORE_EXPR(val1); \
+  while (false) ARROW_IGNORE_EXPR(val2); \
+  while (false) ::arrow::util::detail::NullLog()
+#define ARROW_DCHECK_LT(val1, val2)      \
+  while (false) ARROW_IGNORE_EXPR(val1); \
+  while (false) ARROW_IGNORE_EXPR(val2); \
+  while (false) ::arrow::util::detail::NullLog()
+#define ARROW_DCHECK_GE(val1, val2)      \
+  while (false) ARROW_IGNORE_EXPR(val1); \
+  while (false) ARROW_IGNORE_EXPR(val2); \
+  while (false) ::arrow::util::detail::NullLog()
+#define ARROW_DCHECK_GT(val1, val2)      \
+  while (false) ARROW_IGNORE_EXPR(val1); \
+  while (false) ARROW_IGNORE_EXPR(val2); \
+  while (false) ::arrow::util::detail::NullLog()
+
+#else
+#define ARROW_DFATAL ::arrow::util::ArrowLogLevel::ARROW_FATAL
+
+#define ARROW_DCHECK ARROW_CHECK
+#define ARROW_DCHECK_OK ARROW_CHECK_OK
+#define ARROW_DCHECK_EQ ARROW_CHECK_EQ
+#define ARROW_DCHECK_NE ARROW_CHECK_NE
+#define ARROW_DCHECK_LE ARROW_CHECK_LE
+#define ARROW_DCHECK_LT ARROW_CHECK_LT
+#define ARROW_DCHECK_GE ARROW_CHECK_GE
+#define ARROW_DCHECK_GT ARROW_CHECK_GT
+
+#endif  // NDEBUG
+
+#define DCHECK ARROW_DCHECK
+#define DCHECK_OK ARROW_DCHECK_OK
+#define DCHECK_EQ ARROW_DCHECK_EQ
+#define DCHECK_NE ARROW_DCHECK_NE
+#define DCHECK_LE ARROW_DCHECK_LE
+#define DCHECK_LT ARROW_DCHECK_LT
+#define DCHECK_GE ARROW_DCHECK_GE
+#define DCHECK_GT ARROW_DCHECK_GT
+
+// This code is adapted from
+// https://github.com/ray-project/ray/blob/master/src/ray/util/logging.h.
+
+// To make the logging lib pluggable with other logging libs and make
+// the implementation unawared by the user, ArrowLog is only a declaration
+// which hide the implementation into logging.cc file.
+// In logging.cc, we can choose different log libs using different macros.
+
+// This is also a null log which does not output anything.
+class ARROW_EXPORT ArrowLogBase {
+ public:
+  virtual ~ArrowLogBase() {}
+
+  virtual bool IsEnabled() const { return false; }
+
+  template <typename T>
+  ArrowLogBase& operator<<(const T& t) {
+    if (IsEnabled()) {
+      Stream() << t;
+    }
+    return *this;
+  }
+
+ protected:
+  virtual std::ostream& Stream() = 0;
+};
+
+class ARROW_EXPORT ArrowLog : public ArrowLogBase {
+ public:
+  ArrowLog(const char* file_name, int line_number, ArrowLogLevel severity);
+  ~ArrowLog() override;
+
+  /// Return whether or not current logging instance is enabled.
+  ///
+  /// \return True if logging is enabled and false otherwise.
+  bool IsEnabled() const override;
+
+  /// The init function of arrow log for a program which should be called only once.
+  ///
+  /// \param appName The app name which starts the log.
+  /// \param severity_threshold Logging threshold for the program.
+  /// \param logDir Logging output file name. If empty, the log won't output to file.
+  static void StartArrowLog(const std::string& appName,
+                            ArrowLogLevel severity_threshold = ArrowLogLevel::ARROW_INFO,
+                            const std::string& logDir = "");
+
+  /// The shutdown function of arrow log, it should be used with StartArrowLog as a pair.
+  static void ShutDownArrowLog();
+
+  /// Install the failure signal handler to output call stack when crash.
+  /// If glog is not installed, this function won't do anything.
+  static void InstallFailureSignalHandler();
+
+  /// Uninstall the signal actions installed by InstallFailureSignalHandler.
+  static void UninstallSignalAction();
+
+  /// Return whether or not the log level is enabled in current setting.
+  ///
+  /// \param log_level The input log level to test.
+  /// \return True if input log level is not lower than the threshold.
+  static bool IsLevelEnabled(ArrowLogLevel log_level);
+
+ private:
+  ARROW_DISALLOW_COPY_AND_ASSIGN(ArrowLog);
+
+  // Hide the implementation of log provider by void *.
+  // Otherwise, lib user may define the same macro to use the correct header file.
+  void* logging_provider_;
+  /// True if log messages should be logged and false if they should be ignored.
+  bool is_enabled_;
+
+  static ArrowLogLevel severity_threshold_;
+
+ protected:
+  std::ostream& Stream() override;
+};
+
+// This class make ARROW_CHECK compilation pass to change the << operator to void.
+// This class is copied from glog.
+class ARROW_EXPORT Voidify {
+ public:
+  Voidify() {}
+  // This has to be an operator with a precedence lower than << but
+  // higher than ?:
+  void operator&(ArrowLogBase&) {}
+};
+
+namespace detail {
+
+/// @brief A helper for the nil log sink.
+///
+/// Using this helper is analogous to sending log messages to /dev/null:
+/// nothing gets logged.
+class NullLog {
+ public:
+  /// The no-op output operator.
+  ///
+  /// @param [in] t
+  ///   The object to send into the nil sink.
+  /// @return Reference to the updated object.
+  template <class T>
+  NullLog& operator<<(const T& t) {
+    return *this;
+  }
+};
+
+}  // namespace detail
+}  // namespace util
+}  // namespace arrow
+
+#endif  // GANDIVA_IR
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/macros.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/macros.h
@@ -0,0 +1,191 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+
+#define ARROW_EXPAND(x) x
+#define ARROW_STRINGIFY(x) #x
+#define ARROW_CONCAT(x, y) x##y
+
+// From Google gutil
+#ifndef ARROW_DISALLOW_COPY_AND_ASSIGN
+#define ARROW_DISALLOW_COPY_AND_ASSIGN(TypeName) \
+  TypeName(const TypeName&) = delete;            \
+  void operator=(const TypeName&) = delete
+#endif
+
+#ifndef ARROW_DEFAULT_MOVE_AND_ASSIGN
+#define ARROW_DEFAULT_MOVE_AND_ASSIGN(TypeName) \
+  TypeName(TypeName&&) = default;               \
+  TypeName& operator=(TypeName&&) = default
+#endif
+
+#define ARROW_UNUSED(x) (void)(x)
+#define ARROW_ARG_UNUSED(x)
+//
+// GCC can be told that a certain branch is not likely to be taken (for
+// instance, a CHECK failure), and use that information in static analysis.
+// Giving it this information can help it optimize for the common case in
+// the absence of better information (ie. -fprofile-arcs).
+//
+#if defined(__GNUC__)
+#define ARROW_PREDICT_FALSE(x) (__builtin_expect(!!(x), 0))
+#define ARROW_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
+#define ARROW_NORETURN __attribute__((noreturn))
+#define ARROW_NOINLINE __attribute__((noinline))
+#define ARROW_PREFETCH(addr) __builtin_prefetch(addr)
+#elif defined(_MSC_VER)
+#define ARROW_NORETURN __declspec(noreturn)
+#define ARROW_NOINLINE __declspec(noinline)
+#define ARROW_PREDICT_FALSE(x) (x)
+#define ARROW_PREDICT_TRUE(x) (x)
+#define ARROW_PREFETCH(addr)
+#else
+#define ARROW_NORETURN
+#define ARROW_PREDICT_FALSE(x) (x)
+#define ARROW_PREDICT_TRUE(x) (x)
+#define ARROW_PREFETCH(addr)
+#endif
+
+#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
+#define ARROW_RESTRICT __restrict
+#else
+#define ARROW_RESTRICT
+#endif
+
+// ----------------------------------------------------------------------
+// C++/CLI support macros (see ARROW-1134)
+
+#ifndef NULLPTR
+
+#ifdef __cplusplus_cli
+#define NULLPTR __nullptr
+#else
+#define NULLPTR nullptr
+#endif
+
+#endif  // ifndef NULLPTR
+
+// ----------------------------------------------------------------------
+
+// clang-format off
+// [[deprecated]] is only available in C++14, use this for the time being
+// This macro takes an optional deprecation message
+#ifdef __COVERITY__
+#  define ARROW_DEPRECATED(...)
+#else
+#  define ARROW_DEPRECATED(...) [[deprecated(__VA_ARGS__)]]
+#endif
+
+#ifdef __COVERITY__
+#  define ARROW_DEPRECATED_ENUM_VALUE(...)
+#else
+#  define ARROW_DEPRECATED_ENUM_VALUE(...) [[deprecated(__VA_ARGS__)]]
+#endif
+
+// clang-format on
+
+// Macros to disable deprecation warnings
+
+#ifdef __clang__
+#define ARROW_SUPPRESS_DEPRECATION_WARNING \
+  _Pragma("clang diagnostic push");        \
+  _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"")
+#define ARROW_UNSUPPRESS_DEPRECATION_WARNING _Pragma("clang diagnostic pop")
+#elif defined(__GNUC__)
+#define ARROW_SUPPRESS_DEPRECATION_WARNING \
+  _Pragma("GCC diagnostic push");          \
+  _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
+#define ARROW_UNSUPPRESS_DEPRECATION_WARNING _Pragma("GCC diagnostic pop")
+#elif defined(_MSC_VER)
+#define ARROW_SUPPRESS_DEPRECATION_WARNING \
+  __pragma(warning(push)) __pragma(warning(disable : 4996))
+#define ARROW_UNSUPPRESS_DEPRECATION_WARNING __pragma(warning(pop))
+#else
+#define ARROW_SUPPRESS_DEPRECATION_WARNING
+#define ARROW_UNSUPPRESS_DEPRECATION_WARNING
+#endif
+
+// ----------------------------------------------------------------------
+
+// macros to disable padding
+// these macros are portable across different compilers and platforms
+//[https://github.com/google/flatbuffers/blob/master/include/flatbuffers/flatbuffers.h#L1355]
+#if !defined(MANUALLY_ALIGNED_STRUCT)
+#if defined(_MSC_VER)
+#define MANUALLY_ALIGNED_STRUCT(alignment) \
+  __pragma(pack(1));                       \
+  struct __declspec(align(alignment))
+#define STRUCT_END(name, size) \
+  __pragma(pack());            \
+  static_assert(sizeof(name) == size, "compiler breaks packing rules")
+#elif defined(__GNUC__) || defined(__clang__)
+#define MANUALLY_ALIGNED_STRUCT(alignment) \
+  _Pragma("pack(1)") struct __attribute__((aligned(alignment)))
+#define STRUCT_END(name, size) \
+  _Pragma("pack()") static_assert(sizeof(name) == size, "compiler breaks packing rules")
+#else
+#error Unknown compiler, please define structure alignment macros
+#endif
+#endif  // !defined(MANUALLY_ALIGNED_STRUCT)
+
+// ----------------------------------------------------------------------
+// Convenience macro disabling a particular UBSan check in a function
+
+#if defined(__clang__)
+#define ARROW_DISABLE_UBSAN(feature) __attribute__((no_sanitize(feature)))
+#else
+#define ARROW_DISABLE_UBSAN(feature)
+#endif
+
+// ----------------------------------------------------------------------
+// Machine information
+
+#if INTPTR_MAX == INT64_MAX
+#define ARROW_BITNESS 64
+#elif INTPTR_MAX == INT32_MAX
+#define ARROW_BITNESS 32
+#else
+#error Unexpected INTPTR_MAX
+#endif
+
+// ----------------------------------------------------------------------
+// From googletest
+// (also in parquet-cpp)
+
+// When you need to test the private or protected members of a class,
+// use the FRIEND_TEST macro to declare your tests as friends of the
+// class.  For example:
+//
+// class MyClass {
+//  private:
+//   void MyMethod();
+//   FRIEND_TEST(MyClassTest, MyMethod);
+// };
+//
+// class MyClassTest : public testing::Test {
+//   // ...
+// };
+//
+// TEST_F(MyClassTest, MyMethod) {
+//   // Can call MyClass::MyMethod() here.
+// }
+
+#define FRIEND_TEST(test_case_name, test_name) \
+  friend class test_case_name##_##test_name##_Test
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/map.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/map.h
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <utility>
+
+#include "arrow/result.h"
+
+namespace arrow {
+namespace internal {
+
+/// Helper providing single-lookup conditional insertion into std::map or
+/// std::unordered_map. If `key` exists in the container, an iterator to that pair
+/// will be returned. If `key` does not exist in the container, `gen(key)` will be
+/// invoked and its return value inserted.
+template <typename Map, typename Gen>
+auto GetOrInsertGenerated(Map* map, typename Map::key_type key, Gen&& gen)
+    -> decltype(map->begin()->second = gen(map->begin()->first), map->begin()) {
+  decltype(gen(map->begin()->first)) placeholder{};
+
+  auto it_success = map->emplace(std::move(key), std::move(placeholder));
+  if (it_success.second) {
+    // insertion of placeholder succeeded, overwrite it with gen()
+    const auto& inserted_key = it_success.first->first;
+    auto* value = &it_success.first->second;
+    *value = gen(inserted_key);
+  }
+  return it_success.first;
+}
+
+template <typename Map, typename Gen>
+auto GetOrInsertGenerated(Map* map, typename Map::key_type key, Gen&& gen)
+    -> Result<decltype(map->begin()->second = gen(map->begin()->first).ValueOrDie(),
+                       map->begin())> {
+  decltype(gen(map->begin()->first).ValueOrDie()) placeholder{};
+
+  auto it_success = map->emplace(std::move(key), std::move(placeholder));
+  if (it_success.second) {
+    // insertion of placeholder succeeded, overwrite it with gen()
+    const auto& inserted_key = it_success.first->first;
+    auto* value = &it_success.first->second;
+    ARROW_ASSIGN_OR_RAISE(*value, gen(inserted_key));
+  }
+  return it_success.first;
+}
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/math_constants.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/math_constants.h
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cmath>
+
+// Not provided by default in MSVC,
+// and _USE_MATH_DEFINES is not reliable with unity builds
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+#ifndef M_PI_2
+#define M_PI_2 1.57079632679489661923
+#endif
+#ifndef M_PI_4
+#define M_PI_4 0.785398163397448309616
+#endif
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/memory.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/memory.h
@@ -0,0 +1,43 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "arrow/util/macros.h"
+
+namespace arrow {
+namespace internal {
+
+// A helper function for doing memcpy with multiple threads. This is required
+// to saturate the memory bandwidth of modern cpus.
+void parallel_memcopy(uint8_t* dst, const uint8_t* src, int64_t nbytes,
+                      uintptr_t block_size, int num_threads);
+
+// A helper function for checking if two wrapped objects implementing `Equals`
+// are equal.
+template <typename T>
+bool SharedPtrEquals(const std::shared_ptr<T>& left, const std::shared_ptr<T>& right) {
+  if (left == right) return true;
+  if (left == NULLPTR || right == NULLPTR) return false;
+  return left->Equals(*right);
+}
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/mutex.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/mutex.h
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace util {
+
+/// A wrapper around std::mutex since we can't use it directly in
+/// public headers due to C++/CLI.
+/// https://docs.microsoft.com/en-us/cpp/standard-library/mutex#remarks
+class ARROW_EXPORT Mutex {
+ public:
+  Mutex();
+  Mutex(Mutex&&) = default;
+  Mutex& operator=(Mutex&&) = default;
+
+  /// A Guard is falsy if a lock could not be acquired.
+  class ARROW_EXPORT Guard {
+   public:
+    Guard() : locked_(NULLPTR, [](Mutex* mutex) {}) {}
+    Guard(Guard&&) = default;
+    Guard& operator=(Guard&&) = default;
+
+    explicit operator bool() const { return bool(locked_); }
+
+    void Unlock() { locked_.reset(); }
+
+   private:
+    explicit Guard(Mutex* locked);
+
+    std::unique_ptr<Mutex, void (*)(Mutex*)> locked_;
+    friend Mutex;
+  };
+
+  Guard TryLock();
+  Guard Lock();
+
+ private:
+  struct Impl;
+  std::unique_ptr<Impl, void (*)(Impl*)> impl_;
+};
+
+#ifndef _WIN32
+/// Return a pointer to a process-wide, process-specific Mutex that can be used
+/// at any point in a child process.  NULL is returned when called in the parent.
+///
+/// The rule is to first check that getpid() corresponds to the parent process pid
+/// and, if not, call this function to lock any after-fork reinitialization code.
+/// Like this:
+///
+///   std::atomic<pid_t> pid{getpid()};
+///   ...
+///   if (pid.load() != getpid()) {
+///     // In child process
+///     auto lock = GlobalForkSafeMutex()->Lock();
+///     if (pid.load() != getpid()) {
+///       // Reinitialize internal structures after fork
+///       ...
+///       pid.store(getpid());
+ARROW_EXPORT
+Mutex* GlobalForkSafeMutex();
+#endif
+
+}  // namespace util
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/parallel.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/parallel.h
@@ -0,0 +1,102 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <utility>
+#include <vector>
+
+#include "arrow/status.h"
+#include "arrow/util/functional.h"
+#include "arrow/util/thread_pool.h"
+#include "arrow/util/vector.h"
+
+namespace arrow {
+namespace internal {
+
+// A parallelizer that takes a `Status(int)` function and calls it with
+// arguments between 0 and `num_tasks - 1`, on an arbitrary number of threads.
+
+template <class FUNCTION>
+Status ParallelFor(int num_tasks, FUNCTION&& func,
+                   Executor* executor = internal::GetCpuThreadPool()) {
+  std::vector<Future<>> futures(num_tasks);
+
+  for (int i = 0; i < num_tasks; ++i) {
+    ARROW_ASSIGN_OR_RAISE(futures[i], executor->Submit(func, i));
+  }
+  auto st = Status::OK();
+  for (auto& fut : futures) {
+    st &= fut.status();
+  }
+  return st;
+}
+
+template <class FUNCTION, typename T,
+          typename R = typename internal::call_traits::return_type<FUNCTION>::ValueType>
+Future<std::vector<R>> ParallelForAsync(
+    std::vector<T> inputs, FUNCTION&& func,
+    Executor* executor = internal::GetCpuThreadPool()) {
+  std::vector<Future<R>> futures(inputs.size());
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    ARROW_ASSIGN_OR_RAISE(futures[i], executor->Submit(func, i, std::move(inputs[i])));
+  }
+  return All(std::move(futures))
+      .Then([](const std::vector<Result<R>>& results) -> Result<std::vector<R>> {
+        return UnwrapOrRaise(results);
+      });
+}
+
+// A parallelizer that takes a `Status(int)` function and calls it with
+// arguments between 0 and `num_tasks - 1`, in sequence or in parallel,
+// depending on the input boolean.
+
+template <class FUNCTION>
+Status OptionalParallelFor(bool use_threads, int num_tasks, FUNCTION&& func,
+                           Executor* executor = internal::GetCpuThreadPool()) {
+  if (use_threads) {
+    return ParallelFor(num_tasks, std::forward<FUNCTION>(func), executor);
+  } else {
+    for (int i = 0; i < num_tasks; ++i) {
+      RETURN_NOT_OK(func(i));
+    }
+    return Status::OK();
+  }
+}
+
+// A parallelizer that takes a `Result<R>(int index, T item)` function and
+// calls it with each item from the input array, in sequence or in parallel,
+// depending on the input boolean.
+
+template <class FUNCTION, typename T,
+          typename R = typename internal::call_traits::return_type<FUNCTION>::ValueType>
+Future<std::vector<R>> OptionalParallelForAsync(
+    bool use_threads, std::vector<T> inputs, FUNCTION&& func,
+    Executor* executor = internal::GetCpuThreadPool()) {
+  if (use_threads) {
+    return ParallelForAsync(std::move(inputs), std::forward<FUNCTION>(func), executor);
+  } else {
+    std::vector<R> result(inputs.size());
+    for (size_t i = 0; i < inputs.size(); ++i) {
+      ARROW_ASSIGN_OR_RAISE(result[i], func(i, inputs[i]));
+    }
+    return result;
+  }
+}
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/pcg_random.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/pcg_random.h
@@ -0,0 +1,33 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/vendored/pcg/pcg_random.hpp"  // IWYU pragma: export
+
+namespace arrow {
+namespace random {
+
+using pcg32 = ::arrow_vendored::pcg32;
+using pcg64 = ::arrow_vendored::pcg64;
+using pcg32_fast = ::arrow_vendored::pcg32_fast;
+using pcg64_fast = ::arrow_vendored::pcg64_fast;
+using pcg32_oneseq = ::arrow_vendored::pcg32_oneseq;
+using pcg64_oneseq = ::arrow_vendored::pcg64_oneseq;
+
+}  // namespace random
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/print.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/print.h
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License. template <typename T>
+
+#pragma once
+
+#include <tuple>
+
+namespace arrow {
+namespace internal {
+
+namespace detail {
+
+template <typename OStream, typename Tuple, size_t N>
+struct TuplePrinter {
+  static void Print(OStream* os, const Tuple& t) {
+    TuplePrinter<OStream, Tuple, N - 1>::Print(os, t);
+    *os << std::get<N - 1>(t);
+  }
+};
+
+template <typename OStream, typename Tuple>
+struct TuplePrinter<OStream, Tuple, 0> {
+  static void Print(OStream* os, const Tuple& t) {}
+};
+
+}  // namespace detail
+
+// Print elements from a tuple to a stream, in order.
+// Typical use is to pack a bunch of existing values with std::forward_as_tuple()
+// before passing it to this function.
+template <typename OStream, typename... Args>
+void PrintTuple(OStream* os, const std::tuple<Args&...>& tup) {
+  detail::TuplePrinter<OStream, std::tuple<Args&...>, sizeof...(Args)>::Print(os, tup);
+}
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/queue.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/queue.h
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/vendored/ProducerConsumerQueue.h"
+
+namespace arrow {
+namespace util {
+
+template <typename T>
+using SpscQueue = arrow_vendored::folly::ProducerConsumerQueue<T>;
+
+}
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/range.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/range.h
@@ -0,0 +1,155 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <numeric>
+#include <utility>
+#include <vector>
+
+namespace arrow {
+namespace internal {
+
+/// Create a vector containing the values from start up to stop
+template <typename T>
+std::vector<T> Iota(T start, T stop) {
+  if (start > stop) {
+    return {};
+  }
+  std::vector<T> result(static_cast<size_t>(stop - start));
+  std::iota(result.begin(), result.end(), start);
+  return result;
+}
+
+/// Create a vector containing the values from 0 up to length
+template <typename T>
+std::vector<T> Iota(T length) {
+  return Iota(static_cast<T>(0), length);
+}
+
+/// Create a range from a callable which takes a single index parameter
+/// and returns the value of iterator on each call and a length.
+/// Only iterators obtained from the same range should be compared, the
+/// behaviour generally similar to other STL containers.
+template <typename Generator>
+class LazyRange {
+ private:
+  // callable which generates the values
+  // has to be defined at the beginning of the class for type deduction
+  const Generator gen_;
+  // the length of the range
+  int64_t length_;
+#ifdef _MSC_VER
+  // workaround to VS2010 not supporting decltype properly
+  // see https://stackoverflow.com/questions/21782846/decltype-for-class-member-function
+  static Generator gen_static_;
+#endif
+
+ public:
+#ifdef _MSC_VER
+  using return_type = decltype(gen_static_(0));
+#else
+  using return_type = decltype(gen_(0));
+#endif
+
+  /// Construct a new range from a callable and length
+  LazyRange(Generator gen, int64_t length) : gen_(gen), length_(length) {}
+
+  // Class of the dependent iterator, created implicitly by begin and end
+  class RangeIter {
+   public:
+    using difference_type = int64_t;
+    using value_type = return_type;
+    using reference = const value_type&;
+    using pointer = const value_type*;
+    using iterator_category = std::forward_iterator_tag;
+
+#ifdef _MSC_VER
+    // msvc complains about unchecked iterators,
+    // see https://stackoverflow.com/questions/21655496/error-c4996-checked-iterators
+    using _Unchecked_type = typename LazyRange<Generator>::RangeIter;
+#endif
+
+    RangeIter() = delete;
+    RangeIter(const RangeIter& other) = default;
+    RangeIter& operator=(const RangeIter& other) = default;
+
+    RangeIter(const LazyRange<Generator>& range, int64_t index)
+        : range_(&range), index_(index) {}
+
+    const return_type operator*() const { return range_->gen_(index_); }
+
+    RangeIter operator+(difference_type length) const {
+      return RangeIter(*range_, index_ + length);
+    }
+
+    // pre-increment
+    RangeIter& operator++() {
+      ++index_;
+      return *this;
+    }
+
+    // post-increment
+    RangeIter operator++(int) {
+      auto copy = RangeIter(*this);
+      ++index_;
+      return copy;
+    }
+
+    bool operator==(const typename LazyRange<Generator>::RangeIter& other) const {
+      return this->index_ == other.index_ && this->range_ == other.range_;
+    }
+
+    bool operator!=(const typename LazyRange<Generator>::RangeIter& other) const {
+      return this->index_ != other.index_ || this->range_ != other.range_;
+    }
+
+    int64_t operator-(const typename LazyRange<Generator>::RangeIter& other) const {
+      return this->index_ - other.index_;
+    }
+
+    bool operator<(const typename LazyRange<Generator>::RangeIter& other) const {
+      return this->index_ < other.index_;
+    }
+
+   private:
+    // parent range reference
+    const LazyRange* range_;
+    // current index
+    int64_t index_;
+  };
+
+  friend class RangeIter;
+
+  // Create a new begin const iterator
+  RangeIter begin() { return RangeIter(*this, 0); }
+
+  // Create a new end const iterator
+  RangeIter end() { return RangeIter(*this, length_); }
+};
+
+/// Helper function to create a lazy range from a callable (e.g. lambda) and length
+template <typename Generator>
+LazyRange<Generator> MakeLazyRange(Generator&& gen, int64_t length) {
+  return LazyRange<Generator>(std::forward<Generator>(gen), length);
+}
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/regex.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/regex.h
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cassert>
+#include <initializer_list>
+#include <regex>
+#include <string_view>
+#include <type_traits>
+
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+
+/// Match regex against target and produce string_views out of matches.
+inline bool RegexMatch(const std::regex& regex, std::string_view target,
+                       std::initializer_list<std::string_view*> out_matches) {
+  assert(regex.mark_count() == out_matches.size());
+
+  std::match_results<decltype(target.begin())> match;
+  if (!std::regex_match(target.begin(), target.end(), match, regex)) {
+    return false;
+  }
+
+  // Match #0 is the whole matched sequence
+  assert(regex.mark_count() + 1 == match.size());
+  auto out_it = out_matches.begin();
+  for (size_t i = 1; i < match.size(); ++i) {
+    **out_it++ = target.substr(match.position(i), match.length(i));
+  }
+  return true;
+}
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/rle_encoding.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/rle_encoding.h
@@ -0,0 +1,827 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Imported from Apache Impala (incubating) on 2016-01-29 and modified for use
+// in parquet-cpp, Arrow
+
+#pragma once
+
+#include <algorithm>
+#include <cmath>
+#include <limits>
+#include <vector>
+
+#include "arrow/util/bit_block_counter.h"
+#include "arrow/util/bit_run_reader.h"
+#include "arrow/util/bit_stream_utils.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/macros.h"
+
+namespace arrow {
+namespace util {
+
+/// Utility classes to do run length encoding (RLE) for fixed bit width values.  If runs
+/// are sufficiently long, RLE is used, otherwise, the values are just bit-packed
+/// (literal encoding).
+/// For both types of runs, there is a byte-aligned indicator which encodes the length
+/// of the run and the type of the run.
+/// This encoding has the benefit that when there aren't any long enough runs, values
+/// are always decoded at fixed (can be precomputed) bit offsets OR both the value and
+/// the run length are byte aligned. This allows for very efficient decoding
+/// implementations.
+/// The encoding is:
+///    encoded-block := run*
+///    run := literal-run | repeated-run
+///    literal-run := literal-indicator < literal bytes >
+///    repeated-run := repeated-indicator < repeated value. padded to byte boundary >
+///    literal-indicator := varint_encode( number_of_groups << 1 | 1)
+///    repeated-indicator := varint_encode( number_of_repetitions << 1 )
+//
+/// Each run is preceded by a varint. The varint's least significant bit is
+/// used to indicate whether the run is a literal run or a repeated run. The rest
+/// of the varint is used to determine the length of the run (eg how many times the
+/// value repeats).
+//
+/// In the case of literal runs, the run length is always a multiple of 8 (i.e. encode
+/// in groups of 8), so that no matter the bit-width of the value, the sequence will end
+/// on a byte boundary without padding.
+/// Given that we know it is a multiple of 8, we store the number of 8-groups rather than
+/// the actual number of encoded ints. (This means that the total number of encoded values
+/// can not be determined from the encoded data, since the number of values in the last
+/// group may not be a multiple of 8). For the last group of literal runs, we pad
+/// the group to 8 with zeros. This allows for 8 at a time decoding on the read side
+/// without the need for additional checks.
+//
+/// There is a break-even point when it is more storage efficient to do run length
+/// encoding.  For 1 bit-width values, that point is 8 values.  They require 2 bytes
+/// for both the repeated encoding or the literal encoding.  This value can always
+/// be computed based on the bit-width.
+/// TODO: think about how to use this for strings.  The bit packing isn't quite the same.
+//
+/// Examples with bit-width 1 (eg encoding booleans):
+/// ----------------------------------------
+/// 100 1s followed by 100 0s:
+/// <varint(100 << 1)> <1, padded to 1 byte> <varint(100 << 1)> <0, padded to 1 byte>
+///  - (total 4 bytes)
+//
+/// alternating 1s and 0s (200 total):
+/// 200 ints = 25 groups of 8
+/// <varint((25 << 1) | 1)> <25 bytes of values, bitpacked>
+/// (total 26 bytes, 1 byte overhead)
+//
+
+/// Decoder class for RLE encoded data.
+class RleDecoder {
+ public:
+  /// Create a decoder object. buffer/buffer_len is the decoded data.
+  /// bit_width is the width of each value (before encoding).
+  RleDecoder(const uint8_t* buffer, int buffer_len, int bit_width)
+      : bit_reader_(buffer, buffer_len),
+        bit_width_(bit_width),
+        current_value_(0),
+        repeat_count_(0),
+        literal_count_(0) {
+    DCHECK_GE(bit_width_, 0);
+    DCHECK_LE(bit_width_, 64);
+  }
+
+  RleDecoder() : bit_width_(-1) {}
+
+  void Reset(const uint8_t* buffer, int buffer_len, int bit_width) {
+    DCHECK_GE(bit_width, 0);
+    DCHECK_LE(bit_width, 64);
+    bit_reader_.Reset(buffer, buffer_len);
+    bit_width_ = bit_width;
+    current_value_ = 0;
+    repeat_count_ = 0;
+    literal_count_ = 0;
+  }
+
+  /// Gets the next value.  Returns false if there are no more.
+  template <typename T>
+  bool Get(T* val);
+
+  /// Gets a batch of values.  Returns the number of decoded elements.
+  template <typename T>
+  int GetBatch(T* values, int batch_size);
+
+  /// Like GetBatch but add spacing for null entries
+  template <typename T>
+  int GetBatchSpaced(int batch_size, int null_count, const uint8_t* valid_bits,
+                     int64_t valid_bits_offset, T* out);
+
+  /// Like GetBatch but the values are then decoded using the provided dictionary
+  template <typename T>
+  int GetBatchWithDict(const T* dictionary, int32_t dictionary_length, T* values,
+                       int batch_size);
+
+  /// Like GetBatchWithDict but add spacing for null entries
+  ///
+  /// Null entries will be zero-initialized in `values` to avoid leaking
+  /// private data.
+  template <typename T>
+  int GetBatchWithDictSpaced(const T* dictionary, int32_t dictionary_length, T* values,
+                             int batch_size, int null_count, const uint8_t* valid_bits,
+                             int64_t valid_bits_offset);
+
+ protected:
+  ::arrow::bit_util::BitReader bit_reader_;
+  /// Number of bits needed to encode the value. Must be between 0 and 64.
+  int bit_width_;
+  uint64_t current_value_;
+  int32_t repeat_count_;
+  int32_t literal_count_;
+
+ private:
+  /// Fills literal_count_ and repeat_count_ with next values. Returns false if there
+  /// are no more.
+  template <typename T>
+  bool NextCounts();
+
+  /// Utility methods for retrieving spaced values.
+  template <typename T, typename RunType, typename Converter>
+  int GetSpaced(Converter converter, int batch_size, int null_count,
+                const uint8_t* valid_bits, int64_t valid_bits_offset, T* out);
+};
+
+/// Class to incrementally build the rle data.   This class does not allocate any memory.
+/// The encoding has two modes: encoding repeated runs and literal runs.
+/// If the run is sufficiently short, it is more efficient to encode as a literal run.
+/// This class does so by buffering 8 values at a time.  If they are not all the same
+/// they are added to the literal run.  If they are the same, they are added to the
+/// repeated run.  When we switch modes, the previous run is flushed out.
+class RleEncoder {
+ public:
+  /// buffer/buffer_len: preallocated output buffer.
+  /// bit_width: max number of bits for value.
+  /// TODO: consider adding a min_repeated_run_length so the caller can control
+  /// when values should be encoded as repeated runs.  Currently this is derived
+  /// based on the bit_width, which can determine a storage optimal choice.
+  /// TODO: allow 0 bit_width (and have dict encoder use it)
+  RleEncoder(uint8_t* buffer, int buffer_len, int bit_width)
+      : bit_width_(bit_width), bit_writer_(buffer, buffer_len) {
+    DCHECK_GE(bit_width_, 0);
+    DCHECK_LE(bit_width_, 64);
+    max_run_byte_size_ = MinBufferSize(bit_width);
+    DCHECK_GE(buffer_len, max_run_byte_size_) << "Input buffer not big enough.";
+    Clear();
+  }
+
+  /// Returns the minimum buffer size needed to use the encoder for 'bit_width'
+  /// This is the maximum length of a single run for 'bit_width'.
+  /// It is not valid to pass a buffer less than this length.
+  static int MinBufferSize(int bit_width) {
+    /// 1 indicator byte and MAX_VALUES_PER_LITERAL_RUN 'bit_width' values.
+    int max_literal_run_size = 1 + static_cast<int>(::arrow::bit_util::BytesForBits(
+                                       MAX_VALUES_PER_LITERAL_RUN * bit_width));
+    /// Up to kMaxVlqByteLength indicator and a single 'bit_width' value.
+    int max_repeated_run_size =
+        ::arrow::bit_util::BitReader::kMaxVlqByteLength +
+        static_cast<int>(::arrow::bit_util::BytesForBits(bit_width));
+    return std::max(max_literal_run_size, max_repeated_run_size);
+  }
+
+  /// Returns the maximum byte size it could take to encode 'num_values'.
+  static int MaxBufferSize(int bit_width, int num_values) {
+    // For a bit_width > 1, the worst case is the repetition of "literal run of length 8
+    // and then a repeated run of length 8".
+    // 8 values per smallest run, 8 bits per byte
+    int bytes_per_run = bit_width;
+    int num_runs = static_cast<int>(::arrow::bit_util::CeilDiv(num_values, 8));
+    int literal_max_size = num_runs + num_runs * bytes_per_run;
+
+    // In the very worst case scenario, the data is a concatenation of repeated
+    // runs of 8 values. Repeated run has a 1 byte varint followed by the
+    // bit-packed repeated value
+    int min_repeated_run_size =
+        1 + static_cast<int>(::arrow::bit_util::BytesForBits(bit_width));
+    int repeated_max_size = static_cast<int>(::arrow::bit_util::CeilDiv(num_values, 8)) *
+                            min_repeated_run_size;
+
+    return std::max(literal_max_size, repeated_max_size);
+  }
+
+  /// Encode value.  Returns true if the value fits in buffer, false otherwise.
+  /// This value must be representable with bit_width_ bits.
+  bool Put(uint64_t value);
+
+  /// Flushes any pending values to the underlying buffer.
+  /// Returns the total number of bytes written
+  int Flush();
+
+  /// Resets all the state in the encoder.
+  void Clear();
+
+  /// Returns pointer to underlying buffer
+  uint8_t* buffer() { return bit_writer_.buffer(); }
+  int32_t len() { return bit_writer_.bytes_written(); }
+
+ private:
+  /// Flushes any buffered values.  If this is part of a repeated run, this is largely
+  /// a no-op.
+  /// If it is part of a literal run, this will call FlushLiteralRun, which writes
+  /// out the buffered literal values.
+  /// If 'done' is true, the current run would be written even if it would normally
+  /// have been buffered more.  This should only be called at the end, when the
+  /// encoder has received all values even if it would normally continue to be
+  /// buffered.
+  void FlushBufferedValues(bool done);
+
+  /// Flushes literal values to the underlying buffer.  If update_indicator_byte,
+  /// then the current literal run is complete and the indicator byte is updated.
+  void FlushLiteralRun(bool update_indicator_byte);
+
+  /// Flushes a repeated run to the underlying buffer.
+  void FlushRepeatedRun();
+
+  /// Checks and sets buffer_full_. This must be called after flushing a run to
+  /// make sure there are enough bytes remaining to encode the next run.
+  void CheckBufferFull();
+
+  /// The maximum number of values in a single literal run
+  /// (number of groups encodable by a 1-byte indicator * 8)
+  static const int MAX_VALUES_PER_LITERAL_RUN = (1 << 6) * 8;
+
+  /// Number of bits needed to encode the value. Must be between 0 and 64.
+  const int bit_width_;
+
+  /// Underlying buffer.
+  ::arrow::bit_util::BitWriter bit_writer_;
+
+  /// If true, the buffer is full and subsequent Put()'s will fail.
+  bool buffer_full_;
+
+  /// The maximum byte size a single run can take.
+  int max_run_byte_size_;
+
+  /// We need to buffer at most 8 values for literals.  This happens when the
+  /// bit_width is 1 (so 8 values fit in one byte).
+  /// TODO: generalize this to other bit widths
+  int64_t buffered_values_[8];
+
+  /// Number of values in buffered_values_
+  int num_buffered_values_;
+
+  /// The current (also last) value that was written and the count of how
+  /// many times in a row that value has been seen.  This is maintained even
+  /// if we are in a literal run.  If the repeat_count_ get high enough, we switch
+  /// to encoding repeated runs.
+  uint64_t current_value_;
+  int repeat_count_;
+
+  /// Number of literals in the current run.  This does not include the literals
+  /// that might be in buffered_values_.  Only after we've got a group big enough
+  /// can we decide if they should part of the literal_count_ or repeat_count_
+  int literal_count_;
+
+  /// Pointer to a byte in the underlying buffer that stores the indicator byte.
+  /// This is reserved as soon as we need a literal run but the value is written
+  /// when the literal run is complete.
+  uint8_t* literal_indicator_byte_;
+};
+
+template <typename T>
+inline bool RleDecoder::Get(T* val) {
+  return GetBatch(val, 1) == 1;
+}
+
+template <typename T>
+inline int RleDecoder::GetBatch(T* values, int batch_size) {
+  DCHECK_GE(bit_width_, 0);
+  int values_read = 0;
+
+  auto* out = values;
+
+  while (values_read < batch_size) {
+    int remaining = batch_size - values_read;
+
+    if (repeat_count_ > 0) {  // Repeated value case.
+      int repeat_batch = std::min(remaining, repeat_count_);
+      std::fill(out, out + repeat_batch, static_cast<T>(current_value_));
+
+      repeat_count_ -= repeat_batch;
+      values_read += repeat_batch;
+      out += repeat_batch;
+    } else if (literal_count_ > 0) {
+      int literal_batch = std::min(remaining, literal_count_);
+      int actual_read = bit_reader_.GetBatch(bit_width_, out, literal_batch);
+      if (actual_read != literal_batch) {
+        return values_read;
+      }
+
+      literal_count_ -= literal_batch;
+      values_read += literal_batch;
+      out += literal_batch;
+    } else {
+      if (!NextCounts<T>()) return values_read;
+    }
+  }
+
+  return values_read;
+}
+
+template <typename T, typename RunType, typename Converter>
+inline int RleDecoder::GetSpaced(Converter converter, int batch_size, int null_count,
+                                 const uint8_t* valid_bits, int64_t valid_bits_offset,
+                                 T* out) {
+  if (ARROW_PREDICT_FALSE(null_count == batch_size)) {
+    converter.FillZero(out, out + batch_size);
+    return batch_size;
+  }
+
+  DCHECK_GE(bit_width_, 0);
+  int values_read = 0;
+  int values_remaining = batch_size - null_count;
+
+  // Assume no bits to start.
+  arrow::internal::BitRunReader bit_reader(valid_bits, valid_bits_offset,
+                                           /*length=*/batch_size);
+  arrow::internal::BitRun valid_run = bit_reader.NextRun();
+  while (values_read < batch_size) {
+    if (ARROW_PREDICT_FALSE(valid_run.length == 0)) {
+      valid_run = bit_reader.NextRun();
+    }
+
+    DCHECK_GT(batch_size, 0);
+    DCHECK_GT(valid_run.length, 0);
+
+    if (valid_run.set) {
+      if ((repeat_count_ == 0) && (literal_count_ == 0)) {
+        if (!NextCounts<RunType>()) return values_read;
+        DCHECK((repeat_count_ > 0) ^ (literal_count_ > 0));
+      }
+
+      if (repeat_count_ > 0) {
+        int repeat_batch = 0;
+        // Consume the entire repeat counts incrementing repeat_batch to
+        // be the total of nulls + values consumed, we only need to
+        // get the total count because we can fill in the same value for
+        // nulls and non-nulls. This proves to be a big efficiency win.
+        while (repeat_count_ > 0 && (values_read + repeat_batch) < batch_size) {
+          DCHECK_GT(valid_run.length, 0);
+          if (valid_run.set) {
+            int update_size = std::min(static_cast<int>(valid_run.length), repeat_count_);
+            repeat_count_ -= update_size;
+            repeat_batch += update_size;
+            valid_run.length -= update_size;
+            values_remaining -= update_size;
+          } else {
+            // We can consume all nulls here because we would do so on
+            //  the next loop anyways.
+            repeat_batch += static_cast<int>(valid_run.length);
+            valid_run.length = 0;
+          }
+          if (valid_run.length == 0) {
+            valid_run = bit_reader.NextRun();
+          }
+        }
+        RunType current_value = static_cast<RunType>(current_value_);
+        if (ARROW_PREDICT_FALSE(!converter.IsValid(current_value))) {
+          return values_read;
+        }
+        converter.Fill(out, out + repeat_batch, current_value);
+        out += repeat_batch;
+        values_read += repeat_batch;
+      } else if (literal_count_ > 0) {
+        int literal_batch = std::min(values_remaining, literal_count_);
+        DCHECK_GT(literal_batch, 0);
+
+        // Decode the literals
+        constexpr int kBufferSize = 1024;
+        RunType indices[kBufferSize];
+        literal_batch = std::min(literal_batch, kBufferSize);
+        int actual_read = bit_reader_.GetBatch(bit_width_, indices, literal_batch);
+        if (ARROW_PREDICT_FALSE(actual_read != literal_batch)) {
+          return values_read;
+        }
+        if (!converter.IsValid(indices, /*length=*/actual_read)) {
+          return values_read;
+        }
+        int skipped = 0;
+        int literals_read = 0;
+        while (literals_read < literal_batch) {
+          if (valid_run.set) {
+            int update_size = std::min(literal_batch - literals_read,
+                                       static_cast<int>(valid_run.length));
+            converter.Copy(out, indices + literals_read, update_size);
+            literals_read += update_size;
+            out += update_size;
+            valid_run.length -= update_size;
+          } else {
+            converter.FillZero(out, out + valid_run.length);
+            out += valid_run.length;
+            skipped += static_cast<int>(valid_run.length);
+            valid_run.length = 0;
+          }
+          if (valid_run.length == 0) {
+            valid_run = bit_reader.NextRun();
+          }
+        }
+        literal_count_ -= literal_batch;
+        values_remaining -= literal_batch;
+        values_read += literal_batch + skipped;
+      }
+    } else {
+      converter.FillZero(out, out + valid_run.length);
+      out += valid_run.length;
+      values_read += static_cast<int>(valid_run.length);
+      valid_run.length = 0;
+    }
+  }
+  DCHECK_EQ(valid_run.length, 0);
+  DCHECK_EQ(values_remaining, 0);
+  return values_read;
+}
+
+// Converter for GetSpaced that handles runs that get returned
+// directly as output.
+template <typename T>
+struct PlainRleConverter {
+  T kZero = {};
+  inline bool IsValid(const T& values) const { return true; }
+  inline bool IsValid(const T* values, int32_t length) const { return true; }
+  inline void Fill(T* begin, T* end, const T& run_value) const {
+    std::fill(begin, end, run_value);
+  }
+  inline void FillZero(T* begin, T* end) { std::fill(begin, end, kZero); }
+  inline void Copy(T* out, const T* values, int length) const {
+    std::memcpy(out, values, length * sizeof(T));
+  }
+};
+
+template <typename T>
+inline int RleDecoder::GetBatchSpaced(int batch_size, int null_count,
+                                      const uint8_t* valid_bits,
+                                      int64_t valid_bits_offset, T* out) {
+  if (null_count == 0) {
+    return GetBatch<T>(out, batch_size);
+  }
+
+  PlainRleConverter<T> converter;
+  arrow::internal::BitBlockCounter block_counter(valid_bits, valid_bits_offset,
+                                                 batch_size);
+
+  int total_processed = 0;
+  int processed = 0;
+  arrow::internal::BitBlockCount block;
+
+  do {
+    block = block_counter.NextFourWords();
+    if (block.length == 0) {
+      break;
+    }
+    if (block.AllSet()) {
+      processed = GetBatch<T>(out, block.length);
+    } else if (block.NoneSet()) {
+      converter.FillZero(out, out + block.length);
+      processed = block.length;
+    } else {
+      processed = GetSpaced<T, /*RunType=*/T, PlainRleConverter<T>>(
+          converter, block.length, block.length - block.popcount, valid_bits,
+          valid_bits_offset, out);
+    }
+    total_processed += processed;
+    out += block.length;
+    valid_bits_offset += block.length;
+  } while (processed == block.length);
+  return total_processed;
+}
+
+static inline bool IndexInRange(int32_t idx, int32_t dictionary_length) {
+  return idx >= 0 && idx < dictionary_length;
+}
+
+// Converter for GetSpaced that handles runs of returned dictionary
+// indices.
+template <typename T>
+struct DictionaryConverter {
+  T kZero = {};
+  const T* dictionary;
+  int32_t dictionary_length;
+
+  inline bool IsValid(int32_t value) { return IndexInRange(value, dictionary_length); }
+
+  inline bool IsValid(const int32_t* values, int32_t length) const {
+    using IndexType = int32_t;
+    IndexType min_index = std::numeric_limits<IndexType>::max();
+    IndexType max_index = std::numeric_limits<IndexType>::min();
+    for (int x = 0; x < length; x++) {
+      min_index = std::min(values[x], min_index);
+      max_index = std::max(values[x], max_index);
+    }
+
+    return IndexInRange(min_index, dictionary_length) &&
+           IndexInRange(max_index, dictionary_length);
+  }
+  inline void Fill(T* begin, T* end, const int32_t& run_value) const {
+    std::fill(begin, end, dictionary[run_value]);
+  }
+  inline void FillZero(T* begin, T* end) { std::fill(begin, end, kZero); }
+
+  inline void Copy(T* out, const int32_t* values, int length) const {
+    for (int x = 0; x < length; x++) {
+      out[x] = dictionary[values[x]];
+    }
+  }
+};
+
+template <typename T>
+inline int RleDecoder::GetBatchWithDict(const T* dictionary, int32_t dictionary_length,
+                                        T* values, int batch_size) {
+  // Per https://github.com/apache/parquet-format/blob/master/Encodings.md,
+  // the maximum dictionary index width in Parquet is 32 bits.
+  using IndexType = int32_t;
+  DictionaryConverter<T> converter;
+  converter.dictionary = dictionary;
+  converter.dictionary_length = dictionary_length;
+
+  DCHECK_GE(bit_width_, 0);
+  int values_read = 0;
+
+  auto* out = values;
+
+  while (values_read < batch_size) {
+    int remaining = batch_size - values_read;
+
+    if (repeat_count_ > 0) {
+      auto idx = static_cast<IndexType>(current_value_);
+      if (ARROW_PREDICT_FALSE(!IndexInRange(idx, dictionary_length))) {
+        return values_read;
+      }
+      T val = dictionary[idx];
+
+      int repeat_batch = std::min(remaining, repeat_count_);
+      std::fill(out, out + repeat_batch, val);
+
+      /* Upkeep counters */
+      repeat_count_ -= repeat_batch;
+      values_read += repeat_batch;
+      out += repeat_batch;
+    } else if (literal_count_ > 0) {
+      constexpr int kBufferSize = 1024;
+      IndexType indices[kBufferSize];
+
+      int literal_batch = std::min(remaining, literal_count_);
+      literal_batch = std::min(literal_batch, kBufferSize);
+
+      int actual_read = bit_reader_.GetBatch(bit_width_, indices, literal_batch);
+      if (ARROW_PREDICT_FALSE(actual_read != literal_batch)) {
+        return values_read;
+      }
+      if (ARROW_PREDICT_FALSE(!converter.IsValid(indices, /*length=*/literal_batch))) {
+        return values_read;
+      }
+      converter.Copy(out, indices, literal_batch);
+
+      /* Upkeep counters */
+      literal_count_ -= literal_batch;
+      values_read += literal_batch;
+      out += literal_batch;
+    } else {
+      if (!NextCounts<IndexType>()) return values_read;
+    }
+  }
+
+  return values_read;
+}
+
+template <typename T>
+inline int RleDecoder::GetBatchWithDictSpaced(const T* dictionary,
+                                              int32_t dictionary_length, T* out,
+                                              int batch_size, int null_count,
+                                              const uint8_t* valid_bits,
+                                              int64_t valid_bits_offset) {
+  if (null_count == 0) {
+    return GetBatchWithDict<T>(dictionary, dictionary_length, out, batch_size);
+  }
+  arrow::internal::BitBlockCounter block_counter(valid_bits, valid_bits_offset,
+                                                 batch_size);
+  using IndexType = int32_t;
+  DictionaryConverter<T> converter;
+  converter.dictionary = dictionary;
+  converter.dictionary_length = dictionary_length;
+
+  int total_processed = 0;
+  int processed = 0;
+  arrow::internal::BitBlockCount block;
+  do {
+    block = block_counter.NextFourWords();
+    if (block.length == 0) {
+      break;
+    }
+    if (block.AllSet()) {
+      processed = GetBatchWithDict<T>(dictionary, dictionary_length, out, block.length);
+    } else if (block.NoneSet()) {
+      converter.FillZero(out, out + block.length);
+      processed = block.length;
+    } else {
+      processed = GetSpaced<T, /*RunType=*/IndexType, DictionaryConverter<T>>(
+          converter, block.length, block.length - block.popcount, valid_bits,
+          valid_bits_offset, out);
+    }
+    total_processed += processed;
+    out += block.length;
+    valid_bits_offset += block.length;
+  } while (processed == block.length);
+  return total_processed;
+}
+
+template <typename T>
+bool RleDecoder::NextCounts() {
+  // Read the next run's indicator int, it could be a literal or repeated run.
+  // The int is encoded as a vlq-encoded value.
+  uint32_t indicator_value = 0;
+  if (!bit_reader_.GetVlqInt(&indicator_value)) return false;
+
+  // lsb indicates if it is a literal run or repeated run
+  bool is_literal = indicator_value & 1;
+  uint32_t count = indicator_value >> 1;
+  if (is_literal) {
+    if (ARROW_PREDICT_FALSE(count == 0 || count > static_cast<uint32_t>(INT32_MAX) / 8)) {
+      return false;
+    }
+    literal_count_ = count * 8;
+  } else {
+    if (ARROW_PREDICT_FALSE(count == 0 || count > static_cast<uint32_t>(INT32_MAX))) {
+      return false;
+    }
+    repeat_count_ = count;
+    T value = {};
+    if (!bit_reader_.GetAligned<T>(
+            static_cast<int>(::arrow::bit_util::CeilDiv(bit_width_, 8)), &value)) {
+      return false;
+    }
+    current_value_ = static_cast<uint64_t>(value);
+  }
+  return true;
+}
+
+/// This function buffers input values 8 at a time.  After seeing all 8 values,
+/// it decides whether they should be encoded as a literal or repeated run.
+inline bool RleEncoder::Put(uint64_t value) {
+  DCHECK(bit_width_ == 64 || value < (1ULL << bit_width_));
+  if (ARROW_PREDICT_FALSE(buffer_full_)) return false;
+
+  if (ARROW_PREDICT_TRUE(current_value_ == value)) {
+    ++repeat_count_;
+    if (repeat_count_ > 8) {
+      // This is just a continuation of the current run, no need to buffer the
+      // values.
+      // Note that this is the fast path for long repeated runs.
+      return true;
+    }
+  } else {
+    if (repeat_count_ >= 8) {
+      // We had a run that was long enough but it has ended.  Flush the
+      // current repeated run.
+      DCHECK_EQ(literal_count_, 0);
+      FlushRepeatedRun();
+    }
+    repeat_count_ = 1;
+    current_value_ = value;
+  }
+
+  buffered_values_[num_buffered_values_] = value;
+  if (++num_buffered_values_ == 8) {
+    DCHECK_EQ(literal_count_ % 8, 0);
+    FlushBufferedValues(false);
+  }
+  return true;
+}
+
+inline void RleEncoder::FlushLiteralRun(bool update_indicator_byte) {
+  if (literal_indicator_byte_ == NULL) {
+    // The literal indicator byte has not been reserved yet, get one now.
+    literal_indicator_byte_ = bit_writer_.GetNextBytePtr();
+    DCHECK(literal_indicator_byte_ != NULL);
+  }
+
+  // Write all the buffered values as bit packed literals
+  for (int i = 0; i < num_buffered_values_; ++i) {
+    bool success = bit_writer_.PutValue(buffered_values_[i], bit_width_);
+    DCHECK(success) << "There is a bug in using CheckBufferFull()";
+  }
+  num_buffered_values_ = 0;
+
+  if (update_indicator_byte) {
+    // At this point we need to write the indicator byte for the literal run.
+    // We only reserve one byte, to allow for streaming writes of literal values.
+    // The logic makes sure we flush literal runs often enough to not overrun
+    // the 1 byte.
+    DCHECK_EQ(literal_count_ % 8, 0);
+    int num_groups = literal_count_ / 8;
+    int32_t indicator_value = (num_groups << 1) | 1;
+    DCHECK_EQ(indicator_value & 0xFFFFFF00, 0);
+    *literal_indicator_byte_ = static_cast<uint8_t>(indicator_value);
+    literal_indicator_byte_ = NULL;
+    literal_count_ = 0;
+    CheckBufferFull();
+  }
+}
+
+inline void RleEncoder::FlushRepeatedRun() {
+  DCHECK_GT(repeat_count_, 0);
+  bool result = true;
+  // The lsb of 0 indicates this is a repeated run
+  int32_t indicator_value = repeat_count_ << 1 | 0;
+  result &= bit_writer_.PutVlqInt(static_cast<uint32_t>(indicator_value));
+  result &= bit_writer_.PutAligned(
+      current_value_, static_cast<int>(::arrow::bit_util::CeilDiv(bit_width_, 8)));
+  DCHECK(result);
+  num_buffered_values_ = 0;
+  repeat_count_ = 0;
+  CheckBufferFull();
+}
+
+/// Flush the values that have been buffered.  At this point we decide whether
+/// we need to switch between the run types or continue the current one.
+inline void RleEncoder::FlushBufferedValues(bool done) {
+  if (repeat_count_ >= 8) {
+    // Clear the buffered values.  They are part of the repeated run now and we
+    // don't want to flush them out as literals.
+    num_buffered_values_ = 0;
+    if (literal_count_ != 0) {
+      // There was a current literal run.  All the values in it have been flushed
+      // but we still need to update the indicator byte.
+      DCHECK_EQ(literal_count_ % 8, 0);
+      DCHECK_EQ(repeat_count_, 8);
+      FlushLiteralRun(true);
+    }
+    DCHECK_EQ(literal_count_, 0);
+    return;
+  }
+
+  literal_count_ += num_buffered_values_;
+  DCHECK_EQ(literal_count_ % 8, 0);
+  int num_groups = literal_count_ / 8;
+  if (num_groups + 1 >= (1 << 6)) {
+    // We need to start a new literal run because the indicator byte we've reserved
+    // cannot store more values.
+    DCHECK(literal_indicator_byte_ != NULL);
+    FlushLiteralRun(true);
+  } else {
+    FlushLiteralRun(done);
+  }
+  repeat_count_ = 0;
+}
+
+inline int RleEncoder::Flush() {
+  if (literal_count_ > 0 || repeat_count_ > 0 || num_buffered_values_ > 0) {
+    bool all_repeat = literal_count_ == 0 && (repeat_count_ == num_buffered_values_ ||
+                                              num_buffered_values_ == 0);
+    // There is something pending, figure out if it's a repeated or literal run
+    if (repeat_count_ > 0 && all_repeat) {
+      FlushRepeatedRun();
+    } else {
+      DCHECK_EQ(literal_count_ % 8, 0);
+      // Buffer the last group of literals to 8 by padding with 0s.
+      for (; num_buffered_values_ != 0 && num_buffered_values_ < 8;
+           ++num_buffered_values_) {
+        buffered_values_[num_buffered_values_] = 0;
+      }
+      literal_count_ += num_buffered_values_;
+      FlushLiteralRun(true);
+      repeat_count_ = 0;
+    }
+  }
+  bit_writer_.Flush();
+  DCHECK_EQ(num_buffered_values_, 0);
+  DCHECK_EQ(literal_count_, 0);
+  DCHECK_EQ(repeat_count_, 0);
+
+  return bit_writer_.bytes_written();
+}
+
+inline void RleEncoder::CheckBufferFull() {
+  int bytes_written = bit_writer_.bytes_written();
+  if (bytes_written + max_run_byte_size_ > bit_writer_.buffer_len()) {
+    buffer_full_ = true;
+  }
+}
+
+inline void RleEncoder::Clear() {
+  buffer_full_ = false;
+  current_value_ = 0;
+  repeat_count_ = 0;
+  num_buffered_values_ = 0;
+  literal_count_ = 0;
+  literal_indicator_byte_ = NULL;
+  bit_writer_.Clear();
+}
+
+}  // namespace util
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/simd.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/simd.h
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#ifdef _MSC_VER
+// MSVC x86_64/arm64
+
+#if defined(_M_AMD64) || defined(_M_X64)
+#include <intrin.h>
+#endif
+
+#else
+// gcc/clang (possibly others)
+
+#if defined(ARROW_HAVE_BMI2)
+#include <x86intrin.h>
+#endif
+
+#if defined(ARROW_HAVE_AVX2) || defined(ARROW_HAVE_AVX512)
+#include <immintrin.h>
+#elif defined(ARROW_HAVE_SSE4_2)
+#include <nmmintrin.h>
+#endif
+
+#ifdef ARROW_HAVE_NEON
+#include <arm_neon.h>
+#endif
+
+#endif
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/small_vector.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/small_vector.h
@@ -0,0 +1,511 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <initializer_list>
+#include <iterator>
+#include <limits>
+#include <new>
+#include <type_traits>
+#include <utility>
+
+#include "arrow/util/aligned_storage.h"
+#include "arrow/util/macros.h"
+
+namespace arrow {
+namespace internal {
+
+template <typename T, size_t N, bool NonTrivialDestructor>
+struct StaticVectorStorageBase {
+  using storage_type = AlignedStorage<T>;
+
+  storage_type static_data_[N];
+  size_t size_ = 0;
+
+  void destroy() noexcept {}
+};
+
+template <typename T, size_t N>
+struct StaticVectorStorageBase<T, N, true> {
+  using storage_type = AlignedStorage<T>;
+
+  storage_type static_data_[N];
+  size_t size_ = 0;
+
+  ~StaticVectorStorageBase() noexcept { destroy(); }
+
+  void destroy() noexcept { storage_type::destroy_several(static_data_, size_); }
+};
+
+template <typename T, size_t N, bool D = !std::is_trivially_destructible<T>::value>
+struct StaticVectorStorage : public StaticVectorStorageBase<T, N, D> {
+  using Base = StaticVectorStorageBase<T, N, D>;
+  using typename Base::storage_type;
+
+  using Base::size_;
+  using Base::static_data_;
+
+  StaticVectorStorage() noexcept = default;
+
+  constexpr storage_type* storage_ptr() { return static_data_; }
+
+  constexpr const storage_type* const_storage_ptr() const { return static_data_; }
+
+  // Adjust storage size, but don't initialize any objects
+  void bump_size(size_t addend) {
+    assert(size_ + addend <= N);
+    size_ += addend;
+  }
+
+  void ensure_capacity(size_t min_capacity) { assert(min_capacity <= N); }
+
+  // Adjust storage size, but don't destroy any objects
+  void reduce_size(size_t reduce_by) {
+    assert(reduce_by <= size_);
+    size_ -= reduce_by;
+  }
+
+  // Move objects from another storage, but don't destroy any objects currently
+  // stored in *this.
+  // You need to call destroy() first if necessary (e.g. in a
+  // move assignment operator).
+  void move_construct(StaticVectorStorage&& other) noexcept {
+    size_ = other.size_;
+    if (size_ != 0) {
+      // Use a compile-time memcpy size (N) for trivial types
+      storage_type::move_construct_several(other.static_data_, static_data_, size_, N);
+    }
+  }
+
+  constexpr size_t capacity() const { return N; }
+
+  constexpr size_t max_size() const { return N; }
+
+  void reserve(size_t n) {}
+
+  void clear() {
+    storage_type::destroy_several(static_data_, size_);
+    size_ = 0;
+  }
+};
+
+template <typename T, size_t N>
+struct SmallVectorStorage {
+  using storage_type = AlignedStorage<T>;
+
+  storage_type static_data_[N];
+  size_t size_ = 0;
+  storage_type* data_ = static_data_;
+  size_t dynamic_capacity_ = 0;
+
+  SmallVectorStorage() noexcept = default;
+
+  ~SmallVectorStorage() { destroy(); }
+
+  constexpr storage_type* storage_ptr() { return data_; }
+
+  constexpr const storage_type* const_storage_ptr() const { return data_; }
+
+  void bump_size(size_t addend) {
+    const size_t new_size = size_ + addend;
+    ensure_capacity(new_size);
+    size_ = new_size;
+  }
+
+  void ensure_capacity(size_t min_capacity) {
+    if (dynamic_capacity_) {
+      // Grow dynamic storage if necessary
+      if (min_capacity > dynamic_capacity_) {
+        size_t new_capacity = std::max(dynamic_capacity_ * 2, min_capacity);
+        reallocate_dynamic(new_capacity);
+      }
+    } else if (min_capacity > N) {
+      switch_to_dynamic(min_capacity);
+    }
+  }
+
+  void reduce_size(size_t reduce_by) {
+    assert(reduce_by <= size_);
+    size_ -= reduce_by;
+  }
+
+  void destroy() noexcept {
+    storage_type::destroy_several(data_, size_);
+    if (dynamic_capacity_) {
+      delete[] data_;
+    }
+  }
+
+  void move_construct(SmallVectorStorage&& other) noexcept {
+    size_ = other.size_;
+    dynamic_capacity_ = other.dynamic_capacity_;
+    if (dynamic_capacity_) {
+      data_ = other.data_;
+      other.data_ = other.static_data_;
+      other.dynamic_capacity_ = 0;
+      other.size_ = 0;
+    } else if (size_ != 0) {
+      // Use a compile-time memcpy size (N) for trivial types
+      storage_type::move_construct_several(other.static_data_, static_data_, size_, N);
+    }
+  }
+
+  constexpr size_t capacity() const { return dynamic_capacity_ ? dynamic_capacity_ : N; }
+
+  constexpr size_t max_size() const { return std::numeric_limits<size_t>::max(); }
+
+  void reserve(size_t n) {
+    if (dynamic_capacity_) {
+      if (n > dynamic_capacity_) {
+        reallocate_dynamic(n);
+      }
+    } else if (n > N) {
+      switch_to_dynamic(n);
+    }
+  }
+
+  void clear() {
+    storage_type::destroy_several(data_, size_);
+    size_ = 0;
+  }
+
+ private:
+  void switch_to_dynamic(size_t new_capacity) {
+    dynamic_capacity_ = new_capacity;
+    data_ = new storage_type[new_capacity];
+    storage_type::move_construct_several_and_destroy_source(static_data_, data_, size_);
+  }
+
+  void reallocate_dynamic(size_t new_capacity) {
+    assert(new_capacity >= size_);
+    auto new_data = new storage_type[new_capacity];
+    storage_type::move_construct_several_and_destroy_source(data_, new_data, size_);
+    delete[] data_;
+    dynamic_capacity_ = new_capacity;
+    data_ = new_data;
+  }
+};
+
+template <typename T, size_t N, typename Storage>
+class StaticVectorImpl {
+ private:
+  Storage storage_;
+
+  T* data_ptr() { return storage_.storage_ptr()->get(); }
+
+  constexpr const T* const_data_ptr() const {
+    return storage_.const_storage_ptr()->get();
+  }
+
+ public:
+  using size_type = size_t;
+  using difference_type = ptrdiff_t;
+  using value_type = T;
+  using pointer = T*;
+  using const_pointer = const T*;
+  using reference = T&;
+  using const_reference = const T&;
+  using iterator = T*;
+  using const_iterator = const T*;
+  using reverse_iterator = std::reverse_iterator<iterator>;
+  using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+
+  constexpr StaticVectorImpl() noexcept = default;
+
+  // Move and copy constructors
+  StaticVectorImpl(StaticVectorImpl&& other) noexcept {
+    storage_.move_construct(std::move(other.storage_));
+  }
+
+  StaticVectorImpl& operator=(StaticVectorImpl&& other) noexcept {
+    if (ARROW_PREDICT_TRUE(&other != this)) {
+      // TODO move_assign?
+      storage_.destroy();
+      storage_.move_construct(std::move(other.storage_));
+    }
+    return *this;
+  }
+
+  StaticVectorImpl(const StaticVectorImpl& other) {
+    init_by_copying(other.storage_.size_, other.const_data_ptr());
+  }
+
+  StaticVectorImpl& operator=(const StaticVectorImpl& other) noexcept {
+    if (ARROW_PREDICT_TRUE(&other != this)) {
+      assign_by_copying(other.storage_.size_, other.data());
+    }
+    return *this;
+  }
+
+  // Automatic conversion from std::vector<T>, for convenience
+  StaticVectorImpl(const std::vector<T>& other) {  // NOLINT: explicit
+    init_by_copying(other.size(), other.data());
+  }
+
+  StaticVectorImpl(std::vector<T>&& other) noexcept {  // NOLINT: explicit
+    init_by_moving(other.size(), other.data());
+  }
+
+  StaticVectorImpl& operator=(const std::vector<T>& other) {
+    assign_by_copying(other.size(), other.data());
+    return *this;
+  }
+
+  StaticVectorImpl& operator=(std::vector<T>&& other) noexcept {
+    assign_by_moving(other.size(), other.data());
+    return *this;
+  }
+
+  // Constructing from count and optional initialization value
+  explicit StaticVectorImpl(size_t count) {
+    storage_.bump_size(count);
+    auto* p = storage_.storage_ptr();
+    for (size_t i = 0; i < count; ++i) {
+      p[i].construct();
+    }
+  }
+
+  StaticVectorImpl(size_t count, const T& value) {
+    storage_.bump_size(count);
+    auto* p = storage_.storage_ptr();
+    for (size_t i = 0; i < count; ++i) {
+      p[i].construct(value);
+    }
+  }
+
+  StaticVectorImpl(std::initializer_list<T> values) {
+    storage_.bump_size(values.size());
+    auto* p = storage_.storage_ptr();
+    for (auto&& v : values) {
+      // Unfortunately, cannot move initializer values
+      p++->construct(v);
+    }
+  }
+
+  // Size inspection
+
+  constexpr bool empty() const { return storage_.size_ == 0; }
+
+  constexpr size_t size() const { return storage_.size_; }
+
+  constexpr size_t capacity() const { return storage_.capacity(); }
+
+  constexpr size_t max_size() const { return storage_.max_size(); }
+
+  // Data access
+
+  T& operator[](size_t i) { return data_ptr()[i]; }
+
+  constexpr const T& operator[](size_t i) const { return const_data_ptr()[i]; }
+
+  T& front() { return data_ptr()[0]; }
+
+  constexpr const T& front() const { return const_data_ptr()[0]; }
+
+  T& back() { return data_ptr()[storage_.size_ - 1]; }
+
+  constexpr const T& back() const { return const_data_ptr()[storage_.size_ - 1]; }
+
+  T* data() { return data_ptr(); }
+
+  constexpr const T* data() const { return const_data_ptr(); }
+
+  // Iterators
+
+  iterator begin() { return iterator(data_ptr()); }
+
+  constexpr const_iterator begin() const { return const_iterator(const_data_ptr()); }
+
+  constexpr const_iterator cbegin() const { return const_iterator(const_data_ptr()); }
+
+  iterator end() { return iterator(data_ptr() + storage_.size_); }
+
+  constexpr const_iterator end() const {
+    return const_iterator(const_data_ptr() + storage_.size_);
+  }
+
+  constexpr const_iterator cend() const {
+    return const_iterator(const_data_ptr() + storage_.size_);
+  }
+
+  reverse_iterator rbegin() { return reverse_iterator(end()); }
+
+  constexpr const_reverse_iterator rbegin() const {
+    return const_reverse_iterator(end());
+  }
+
+  constexpr const_reverse_iterator crbegin() const {
+    return const_reverse_iterator(end());
+  }
+
+  reverse_iterator rend() { return reverse_iterator(begin()); }
+
+  constexpr const_reverse_iterator rend() const {
+    return const_reverse_iterator(begin());
+  }
+
+  constexpr const_reverse_iterator crend() const {
+    return const_reverse_iterator(begin());
+  }
+
+  // Mutations
+
+  void reserve(size_t n) { storage_.reserve(n); }
+
+  void clear() { storage_.clear(); }
+
+  void push_back(const T& value) {
+    storage_.bump_size(1);
+    storage_.storage_ptr()[storage_.size_ - 1].construct(value);
+  }
+
+  void push_back(T&& value) {
+    storage_.bump_size(1);
+    storage_.storage_ptr()[storage_.size_ - 1].construct(std::move(value));
+  }
+
+  template <typename... Args>
+  void emplace_back(Args&&... args) {
+    storage_.bump_size(1);
+    storage_.storage_ptr()[storage_.size_ - 1].construct(std::forward<Args>(args)...);
+  }
+
+  template <typename InputIt>
+  iterator insert(const_iterator insert_at, InputIt first, InputIt last) {
+    const size_t n = storage_.size_;
+    const size_t it_size = static_cast<size_t>(last - first);  // XXX might be O(n)?
+    const size_t pos = static_cast<size_t>(insert_at - const_data_ptr());
+    storage_.bump_size(it_size);
+    auto* p = storage_.storage_ptr();
+    if (it_size == 0) {
+      return p[pos].get();
+    }
+    const size_t end_pos = pos + it_size;
+
+    // Move [pos; n) to [end_pos; end_pos + n - pos)
+    size_t i = n;
+    size_t j = end_pos + n - pos;
+    while (j > std::max(n, end_pos)) {
+      p[--j].move_construct(&p[--i]);
+    }
+    while (j > end_pos) {
+      p[--j].move_assign(&p[--i]);
+    }
+    assert(j == end_pos);
+    // Copy [first; last) to [pos; end_pos)
+    j = pos;
+    while (j < std::min(n, end_pos)) {
+      p[j++].assign(*first++);
+    }
+    while (j < end_pos) {
+      p[j++].construct(*first++);
+    }
+    assert(first == last);
+    return p[pos].get();
+  }
+
+  void resize(size_t n) {
+    const size_t old_size = storage_.size_;
+    if (n > storage_.size_) {
+      storage_.bump_size(n - old_size);
+      auto* p = storage_.storage_ptr();
+      for (size_t i = old_size; i < n; ++i) {
+        p[i].construct(T{});
+      }
+    } else {
+      auto* p = storage_.storage_ptr();
+      for (size_t i = n; i < old_size; ++i) {
+        p[i].destroy();
+      }
+      storage_.reduce_size(old_size - n);
+    }
+  }
+
+  void resize(size_t n, const T& value) {
+    const size_t old_size = storage_.size_;
+    if (n > storage_.size_) {
+      storage_.bump_size(n - old_size);
+      auto* p = storage_.storage_ptr();
+      for (size_t i = old_size; i < n; ++i) {
+        p[i].construct(value);
+      }
+    } else {
+      auto* p = storage_.storage_ptr();
+      for (size_t i = n; i < old_size; ++i) {
+        p[i].destroy();
+      }
+      storage_.reduce_size(old_size - n);
+    }
+  }
+
+ private:
+  template <typename InputIt>
+  void init_by_copying(size_t n, InputIt src) {
+    storage_.bump_size(n);
+    auto* dest = storage_.storage_ptr();
+    for (size_t i = 0; i < n; ++i, ++src) {
+      dest[i].construct(*src);
+    }
+  }
+
+  template <typename InputIt>
+  void init_by_moving(size_t n, InputIt src) {
+    init_by_copying(n, std::make_move_iterator(src));
+  }
+
+  template <typename InputIt>
+  void assign_by_copying(size_t n, InputIt src) {
+    const size_t old_size = storage_.size_;
+    if (n > old_size) {
+      storage_.bump_size(n - old_size);
+      auto* dest = storage_.storage_ptr();
+      for (size_t i = 0; i < old_size; ++i, ++src) {
+        dest[i].assign(*src);
+      }
+      for (size_t i = old_size; i < n; ++i, ++src) {
+        dest[i].construct(*src);
+      }
+    } else {
+      auto* dest = storage_.storage_ptr();
+      for (size_t i = 0; i < n; ++i, ++src) {
+        dest[i].assign(*src);
+      }
+      for (size_t i = n; i < old_size; ++i) {
+        dest[i].destroy();
+      }
+      storage_.reduce_size(old_size - n);
+    }
+  }
+
+  template <typename InputIt>
+  void assign_by_moving(size_t n, InputIt src) {
+    assign_by_copying(n, std::make_move_iterator(src));
+  }
+};
+
+template <typename T, size_t N>
+using StaticVector = StaticVectorImpl<T, N, StaticVectorStorage<T, N>>;
+
+template <typename T, size_t N>
+using SmallVector = StaticVectorImpl<T, N, SmallVectorStorage<T, N>>;
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/sort.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/sort.h
@@ -0,0 +1,78 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+#include <functional>
+#include <numeric>
+#include <utility>
+#include <vector>
+
+namespace arrow {
+namespace internal {
+
+template <typename T, typename Cmp = std::less<T>>
+std::vector<int64_t> ArgSort(const std::vector<T>& values, Cmp&& cmp = {}) {
+  std::vector<int64_t> indices(values.size());
+  std::iota(indices.begin(), indices.end(), 0);
+  std::sort(indices.begin(), indices.end(),
+            [&](int64_t i, int64_t j) -> bool { return cmp(values[i], values[j]); });
+  return indices;
+}
+
+template <typename T>
+size_t Permute(const std::vector<int64_t>& indices, std::vector<T>* values) {
+  if (indices.size() <= 1) {
+    return indices.size();
+  }
+
+  // mask indicating which of values are in the correct location
+  std::vector<bool> sorted(indices.size(), false);
+
+  size_t cycle_count = 0;
+
+  for (auto cycle_start = sorted.begin(); cycle_start != sorted.end();
+       cycle_start = std::find(cycle_start, sorted.end(), false)) {
+    ++cycle_count;
+
+    // position in which an element belongs WRT sort
+    auto sort_into = static_cast<int64_t>(cycle_start - sorted.begin());
+
+    if (indices[sort_into] == sort_into) {
+      // trivial cycle
+      sorted[sort_into] = true;
+      continue;
+    }
+
+    // resolve this cycle
+    const auto end = sort_into;
+    for (int64_t take_from = indices[sort_into]; take_from != end;
+         take_from = indices[sort_into]) {
+      std::swap(values->at(sort_into), values->at(take_from));
+      sorted[sort_into] = true;
+      sort_into = take_from;
+    }
+    sorted[sort_into] = true;
+  }
+
+  return cycle_count;
+}
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/spaced.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/spaced.h
@@ -0,0 +1,98 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+
+#include "arrow/util/bit_run_reader.h"
+
+namespace arrow {
+namespace util {
+namespace internal {
+
+/// \brief Compress the buffer to spaced, excluding the null entries.
+///
+/// \param[in] src the source buffer
+/// \param[in] num_values the size of source buffer
+/// \param[in] valid_bits bitmap data indicating position of valid slots
+/// \param[in] valid_bits_offset offset into valid_bits
+/// \param[out] output the output buffer spaced
+/// \return The size of spaced buffer.
+template <typename T>
+inline int SpacedCompress(const T* src, int num_values, const uint8_t* valid_bits,
+                          int64_t valid_bits_offset, T* output) {
+  int num_valid_values = 0;
+
+  arrow::internal::SetBitRunReader reader(valid_bits, valid_bits_offset, num_values);
+  while (true) {
+    const auto run = reader.NextRun();
+    if (run.length == 0) {
+      break;
+    }
+    std::memcpy(output + num_valid_values, src + run.position, run.length * sizeof(T));
+    num_valid_values += static_cast<int32_t>(run.length);
+  }
+
+  return num_valid_values;
+}
+
+/// \brief Relocate values in buffer into positions of non-null values as indicated by
+/// a validity bitmap.
+///
+/// \param[in, out] buffer the in-place buffer
+/// \param[in] num_values total size of buffer including null slots
+/// \param[in] null_count number of null slots
+/// \param[in] valid_bits bitmap data indicating position of valid slots
+/// \param[in] valid_bits_offset offset into valid_bits
+/// \return The number of values expanded, including nulls.
+template <typename T>
+inline int SpacedExpand(T* buffer, int num_values, int null_count,
+                        const uint8_t* valid_bits, int64_t valid_bits_offset) {
+  // Point to end as we add the spacing from the back.
+  int idx_decode = num_values - null_count;
+
+  // Depending on the number of nulls, some of the value slots in buffer may
+  // be uninitialized, and this will cause valgrind warnings / potentially UB
+  std::memset(static_cast<void*>(buffer + idx_decode), 0, null_count * sizeof(T));
+  if (idx_decode == 0) {
+    // All nulls, nothing more to do
+    return num_values;
+  }
+
+  arrow::internal::ReverseSetBitRunReader reader(valid_bits, valid_bits_offset,
+                                                 num_values);
+  while (true) {
+    const auto run = reader.NextRun();
+    if (run.length == 0) {
+      break;
+    }
+    idx_decode -= static_cast<int32_t>(run.length);
+    assert(idx_decode >= 0);
+    std::memmove(buffer + run.position, buffer + idx_decode, run.length * sizeof(T));
+  }
+
+  // Otherwise caller gave an incorrect null_count
+  assert(idx_decode == 0);
+  return num_values;
+}
+
+}  // namespace internal
+}  // namespace util
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/stopwatch.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/stopwatch.h
@@ -0,0 +1,48 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cassert>
+#include <chrono>
+
+namespace arrow {
+namespace internal {
+
+class StopWatch {
+  // This clock should give us wall clock time
+  using ClockType = std::chrono::steady_clock;
+
+ public:
+  StopWatch() {}
+
+  void Start() { start_ = ClockType::now(); }
+
+  // Returns time in nanoseconds.
+  uint64_t Stop() {
+    auto stop = ClockType::now();
+    std::chrono::nanoseconds d = stop - start_;
+    assert(d.count() >= 0);
+    return static_cast<uint64_t>(d.count());
+  }
+
+ private:
+  std::chrono::time_point<ClockType> start_;
+};
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/string.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/string.h
@@ -0,0 +1,171 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cassert>
+#include <optional>
+#include <string>
+#include <string_view>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#if __has_include(<charconv>)
+#include <charconv>
+#endif
+
+#include "arrow/result.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Status;
+
+ARROW_EXPORT std::string HexEncode(const uint8_t* data, size_t length);
+
+ARROW_EXPORT std::string Escape(const char* data, size_t length);
+
+ARROW_EXPORT std::string HexEncode(const char* data, size_t length);
+
+ARROW_EXPORT std::string HexEncode(std::string_view str);
+
+ARROW_EXPORT std::string Escape(std::string_view str);
+
+ARROW_EXPORT Status ParseHexValue(const char* data, uint8_t* out);
+
+namespace internal {
+
+/// Like std::string_view::starts_with in C++20
+inline bool StartsWith(std::string_view s, std::string_view prefix) {
+  return s.length() >= prefix.length() &&
+         (s.empty() || s.substr(0, prefix.length()) == prefix);
+}
+
+/// Like std::string_view::ends_with in C++20
+inline bool EndsWith(std::string_view s, std::string_view suffix) {
+  return s.length() >= suffix.length() &&
+         (s.empty() || s.substr(s.length() - suffix.length()) == suffix);
+}
+
+/// \brief Split a string with a delimiter
+ARROW_EXPORT
+std::vector<std::string_view> SplitString(std::string_view v, char delim,
+                                          int64_t limit = 0);
+
+/// \brief Join strings with a delimiter
+ARROW_EXPORT
+std::string JoinStrings(const std::vector<std::string_view>& strings,
+                        std::string_view delimiter);
+
+/// \brief Join strings with a delimiter
+ARROW_EXPORT
+std::string JoinStrings(const std::vector<std::string>& strings,
+                        std::string_view delimiter);
+
+/// \brief Trim whitespace from left and right sides of string
+ARROW_EXPORT
+std::string TrimString(std::string value);
+
+ARROW_EXPORT
+bool AsciiEqualsCaseInsensitive(std::string_view left, std::string_view right);
+
+ARROW_EXPORT
+std::string AsciiToLower(std::string_view value);
+
+ARROW_EXPORT
+std::string AsciiToUpper(std::string_view value);
+
+/// \brief Search for the first instance of a token and replace it or return nullopt if
+/// the token is not found.
+ARROW_EXPORT
+std::optional<std::string> Replace(std::string_view s, std::string_view token,
+                                   std::string_view replacement);
+
+/// \brief Get boolean value from string
+///
+/// If "1", "true" (case-insensitive), returns true
+/// If "0", "false" (case-insensitive), returns false
+/// Otherwise, returns Status::Invalid
+ARROW_EXPORT
+arrow::Result<bool> ParseBoolean(std::string_view value);
+
+#if __has_include(<charconv>)
+
+namespace detail {
+template <typename T, typename = void>
+struct can_to_chars : public std::false_type {};
+
+template <typename T>
+struct can_to_chars<
+    T, std::void_t<decltype(std::to_chars(std::declval<char*>(), std::declval<char*>(),
+                                          std::declval<std::remove_reference_t<T>>()))>>
+    : public std::true_type {};
+}  // namespace detail
+
+/// \brief Whether std::to_chars exists for the current value type.
+///
+/// This is useful as some C++ libraries do not implement all specified overloads
+/// for std::to_chars.
+template <typename T>
+inline constexpr bool have_to_chars = detail::can_to_chars<T>::value;
+
+/// \brief An ergonomic wrapper around std::to_chars, returning a std::string
+///
+/// For most inputs, the std::string result will not incur any heap allocation
+/// thanks to small string optimization.
+///
+/// Compared to std::to_string, this function gives locale-agnostic results
+/// and might also be faster.
+template <typename T, typename... Args>
+std::string ToChars(T value, Args&&... args) {
+  if constexpr (!have_to_chars<T>) {
+    // Some C++ standard libraries do not yet implement std::to_chars for all types,
+    // in which case we have to fallback to std::string.
+    return std::to_string(value);
+  } else {
+    // According to various sources, the GNU libstdc++ and Microsoft's C++ STL
+    // allow up to 15 bytes of small string optimization, while clang's libc++
+    // goes up to 22 bytes. Choose the pessimistic value.
+    std::string out(15, 0);
+    auto res = std::to_chars(&out.front(), &out.back(), value, args...);
+    while (res.ec != std::errc{}) {
+      assert(res.ec == std::errc::value_too_large);
+      out.resize(out.capacity() * 2);
+      res = std::to_chars(&out.front(), &out.back(), value, args...);
+    }
+    const auto length = res.ptr - out.data();
+    assert(length <= static_cast<int64_t>(out.length()));
+    out.resize(length);
+    return out;
+  }
+}
+
+#else  // !__has_include(<charconv>)
+
+template <typename T>
+inline constexpr bool have_to_chars = false;
+
+template <typename T, typename... Args>
+std::string ToChars(T value, Args&&... args) {
+  return std::to_string(value);
+}
+
+#endif
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/string_builder.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/string_builder.h
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License. template <typename T>
+
+#pragma once
+
+#include <memory>
+#include <ostream>
+#include <string>
+#include <utility>
+
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace util {
+
+namespace detail {
+
+class ARROW_EXPORT StringStreamWrapper {
+ public:
+  StringStreamWrapper();
+  ~StringStreamWrapper();
+
+  std::ostream& stream() { return ostream_; }
+  std::string str();
+
+ protected:
+  std::unique_ptr<std::ostringstream> sstream_;
+  std::ostream& ostream_;
+};
+
+}  // namespace detail
+
+template <typename Head>
+void StringBuilderRecursive(std::ostream& stream, Head&& head) {
+  stream << head;
+}
+
+template <typename Head, typename... Tail>
+void StringBuilderRecursive(std::ostream& stream, Head&& head, Tail&&... tail) {
+  StringBuilderRecursive(stream, std::forward<Head>(head));
+  StringBuilderRecursive(stream, std::forward<Tail>(tail)...);
+}
+
+template <typename... Args>
+std::string StringBuilder(Args&&... args) {
+  detail::StringStreamWrapper ss;
+  StringBuilderRecursive(ss.stream(), std::forward<Args>(args)...);
+  return ss.str();
+}
+
+/// CRTP helper for declaring string representation. Defines operator<<
+template <typename T>
+class ToStringOstreamable {
+ public:
+  ~ToStringOstreamable() {
+    static_assert(
+        std::is_same<decltype(std::declval<const T>().ToString()), std::string>::value,
+        "ToStringOstreamable depends on the method T::ToString() const");
+  }
+
+ private:
+  const T& cast() const { return static_cast<const T&>(*this); }
+
+  friend inline std::ostream& operator<<(std::ostream& os, const ToStringOstreamable& t) {
+    return os << t.cast().ToString();
+  }
+};
+
+}  // namespace util
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/task_group.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/task_group.h
@@ -0,0 +1,106 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <utility>
+
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/cancel.h"
+#include "arrow/util/functional.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+
+/// \brief A group of related tasks
+///
+/// A TaskGroup executes tasks with the signature `Status()`.
+/// Execution can be serial or parallel, depending on the TaskGroup
+/// implementation.  When Finish() returns, it is guaranteed that all
+/// tasks have finished, or at least one has errored.
+///
+/// Once an error has occurred any tasks that are submitted to the task group
+/// will not run.  The call to Append will simply return without scheduling the
+/// task.
+///
+/// If the task group is parallel it is possible that multiple tasks could be
+/// running at the same time and one of those tasks fails.  This will put the
+/// task group in a failure state (so additional tasks cannot be run) however
+/// it will not interrupt running tasks.  Finish will not complete
+/// until all running tasks have finished, even if one task fails.
+///
+/// Once a task group has finished new tasks may not be added to it.  If you need to start
+/// a new batch of work then you should create a new task group.
+class ARROW_EXPORT TaskGroup : public std::enable_shared_from_this<TaskGroup> {
+ public:
+  /// Add a Status-returning function to execute.  Execution order is
+  /// undefined.  The function may be executed immediately or later.
+  template <typename Function>
+  void Append(Function&& func) {
+    return AppendReal(std::forward<Function>(func));
+  }
+
+  /// Wait for execution of all tasks (and subgroups) to be finished,
+  /// or for at least one task (or subgroup) to error out.
+  /// The returned Status propagates the error status of the first failing
+  /// task (or subgroup).
+  virtual Status Finish() = 0;
+
+  /// Returns a future that will complete the first time all tasks are finished.
+  /// This should be called only after all top level tasks
+  /// have been added to the task group.
+  ///
+  /// If you are using a TaskGroup asynchronously there are a few considerations to keep
+  /// in mind.  The tasks should not block on I/O, etc (defeats the purpose of using
+  /// futures) and should not be doing any nested locking or you run the risk of the tasks
+  /// getting stuck in the thread pool waiting for tasks which cannot get scheduled.
+  ///
+  /// Primarily this call is intended to help migrate existing work written with TaskGroup
+  /// in mind to using futures without having to do a complete conversion on the first
+  /// pass.
+  virtual Future<> FinishAsync() = 0;
+
+  /// The current aggregate error Status.  Non-blocking, useful for stopping early.
+  virtual Status current_status() = 0;
+
+  /// Whether some tasks have already failed.  Non-blocking, useful for stopping early.
+  virtual bool ok() const = 0;
+
+  /// How many tasks can typically be executed in parallel.
+  /// This is only a hint, useful for testing or debugging.
+  virtual int parallelism() = 0;
+
+  static std::shared_ptr<TaskGroup> MakeSerial(StopToken = StopToken::Unstoppable());
+  static std::shared_ptr<TaskGroup> MakeThreaded(internal::Executor*,
+                                                 StopToken = StopToken::Unstoppable());
+
+  virtual ~TaskGroup() = default;
+
+ protected:
+  TaskGroup() = default;
+  ARROW_DISALLOW_COPY_AND_ASSIGN(TaskGroup);
+
+  virtual void AppendReal(FnOnce<Status()> task) = 0;
+};
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/tdigest.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/tdigest.h
@@ -0,0 +1,104 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// approximate quantiles from arbitrary length dataset with O(1) space
+// based on 'Computing Extremely Accurate Quantiles Using t-Digests' from Dunning & Ertl
+// - https://arxiv.org/abs/1902.04023
+// - https://github.com/tdunning/t-digest
+
+#pragma once
+
+#include <cmath>
+#include <memory>
+#include <vector>
+
+#include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+
+class Status;
+
+namespace internal {
+
+class ARROW_EXPORT TDigest {
+ public:
+  explicit TDigest(uint32_t delta = 100, uint32_t buffer_size = 500);
+  ~TDigest();
+  TDigest(TDigest&&);
+  TDigest& operator=(TDigest&&);
+
+  // reset and re-use this tdigest
+  void Reset();
+
+  // validate data integrity
+  Status Validate() const;
+
+  // dump internal data, only for debug
+  void Dump() const;
+
+  // buffer a single data point, consume internal buffer if full
+  // this function is intensively called and performance critical
+  // call it only if you are sure no NAN exists in input data
+  void Add(double value) {
+    DCHECK(!std::isnan(value)) << "cannot add NAN";
+    if (ARROW_PREDICT_FALSE(input_.size() == input_.capacity())) {
+      MergeInput();
+    }
+    input_.push_back(value);
+  }
+
+  // skip NAN on adding
+  template <typename T>
+  typename std::enable_if<std::is_floating_point<T>::value>::type NanAdd(T value) {
+    if (!std::isnan(value)) Add(value);
+  }
+
+  template <typename T>
+  typename std::enable_if<std::is_integral<T>::value>::type NanAdd(T value) {
+    Add(static_cast<double>(value));
+  }
+
+  // merge with other t-digests, called infrequently
+  void Merge(const std::vector<TDigest>& others);
+  void Merge(const TDigest& other);
+
+  // calculate quantile
+  double Quantile(double q) const;
+
+  double Min() const { return Quantile(0); }
+  double Max() const { return Quantile(1); }
+  double Mean() const;
+
+  // check if this tdigest contains no valid data points
+  bool is_empty() const;
+
+ private:
+  // merge input data with current tdigest
+  void MergeInput() const;
+
+  // input buffer, size = buffer_size * sizeof(double)
+  mutable std::vector<double> input_;
+
+  // hide other members with pimpl
+  class TDigestImpl;
+  std::unique_ptr<TDigestImpl> impl_;
+};
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/test_common.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/test_common.h
@@ -0,0 +1,90 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <iosfwd>
+
+#include "arrow/testing/gtest_util.h"
+#include "arrow/util/iterator.h"
+
+namespace arrow {
+
+struct TestInt {
+  TestInt();
+  TestInt(int i);  // NOLINT runtime/explicit
+  int value;
+
+  bool operator==(const TestInt& other) const;
+
+  friend std::ostream& operator<<(std::ostream& os, const TestInt& v);
+};
+
+template <>
+struct IterationTraits<TestInt> {
+  static TestInt End() { return TestInt(); }
+  static bool IsEnd(const TestInt& val) { return val == IterationTraits<TestInt>::End(); }
+};
+
+struct TestStr {
+  TestStr();
+  TestStr(const std::string& s);  // NOLINT runtime/explicit
+  TestStr(const char* s);         // NOLINT runtime/explicit
+  explicit TestStr(const TestInt& test_int);
+  std::string value;
+
+  bool operator==(const TestStr& other) const;
+
+  friend std::ostream& operator<<(std::ostream& os, const TestStr& v);
+};
+
+template <>
+struct IterationTraits<TestStr> {
+  static TestStr End() { return TestStr(); }
+  static bool IsEnd(const TestStr& val) { return val == IterationTraits<TestStr>::End(); }
+};
+
+std::vector<TestInt> RangeVector(unsigned int max, unsigned int step = 1);
+
+template <typename T>
+inline Iterator<T> VectorIt(std::vector<T> v) {
+  return MakeVectorIterator<T>(std::move(v));
+}
+
+template <typename T>
+inline Iterator<T> PossiblySlowVectorIt(std::vector<T> v, bool slow = false) {
+  auto iterator = MakeVectorIterator<T>(std::move(v));
+  if (slow) {
+    return MakeTransformedIterator<T, T>(std::move(iterator),
+                                         [](T item) -> Result<TransformFlow<T>> {
+                                           SleepABit();
+                                           return TransformYield(item);
+                                         });
+  } else {
+    return iterator;
+  }
+}
+
+template <typename T>
+inline void AssertIteratorExhausted(Iterator<T>& it) {
+  ASSERT_OK_AND_ASSIGN(T next, it.Next());
+  ASSERT_TRUE(IsIterationEnd(next));
+}
+
+Transformer<TestInt, TestStr> MakeFilter(std::function<bool(TestInt&)> filter);
+
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/thread_pool.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/thread_pool.h
@@ -0,0 +1,527 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <queue>
+#include <type_traits>
+#include <utility>
+
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/util/cancel.h"
+#include "arrow/util/functional.h"
+#include "arrow/util/future.h"
+#include "arrow/util/iterator.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+#if defined(_MSC_VER)
+// Disable harmless warning for decorated name length limit
+#pragma warning(disable : 4503)
+#endif
+
+namespace arrow {
+
+/// \brief Get the capacity of the global thread pool
+///
+/// Return the number of worker threads in the thread pool to which
+/// Arrow dispatches various CPU-bound tasks.  This is an ideal number,
+/// not necessarily the exact number of threads at a given point in time.
+///
+/// You can change this number using SetCpuThreadPoolCapacity().
+ARROW_EXPORT int GetCpuThreadPoolCapacity();
+
+/// \brief Set the capacity of the global thread pool
+///
+/// Set the number of worker threads int the thread pool to which
+/// Arrow dispatches various CPU-bound tasks.
+///
+/// The current number is returned by GetCpuThreadPoolCapacity().
+ARROW_EXPORT Status SetCpuThreadPoolCapacity(int threads);
+
+namespace internal {
+
+// Hints about a task that may be used by an Executor.
+// They are ignored by the provided ThreadPool implementation.
+struct TaskHints {
+  // The lower, the more urgent
+  int32_t priority = 0;
+  // The IO transfer size in bytes
+  int64_t io_size = -1;
+  // The approximate CPU cost in number of instructions
+  int64_t cpu_cost = -1;
+  // An application-specific ID
+  int64_t external_id = -1;
+};
+
+class ARROW_EXPORT Executor {
+ public:
+  using StopCallback = internal::FnOnce<void(const Status&)>;
+
+  virtual ~Executor();
+
+  // Spawn a fire-and-forget task.
+  template <typename Function>
+  Status Spawn(Function&& func) {
+    return SpawnReal(TaskHints{}, std::forward<Function>(func), StopToken::Unstoppable(),
+                     StopCallback{});
+  }
+  template <typename Function>
+  Status Spawn(Function&& func, StopToken stop_token) {
+    return SpawnReal(TaskHints{}, std::forward<Function>(func), std::move(stop_token),
+                     StopCallback{});
+  }
+  template <typename Function>
+  Status Spawn(TaskHints hints, Function&& func) {
+    return SpawnReal(hints, std::forward<Function>(func), StopToken::Unstoppable(),
+                     StopCallback{});
+  }
+  template <typename Function>
+  Status Spawn(TaskHints hints, Function&& func, StopToken stop_token) {
+    return SpawnReal(hints, std::forward<Function>(func), std::move(stop_token),
+                     StopCallback{});
+  }
+  template <typename Function>
+  Status Spawn(TaskHints hints, Function&& func, StopToken stop_token,
+               StopCallback stop_callback) {
+    return SpawnReal(hints, std::forward<Function>(func), std::move(stop_token),
+                     std::move(stop_callback));
+  }
+
+  // Transfers a future to this executor.  Any continuations added to the
+  // returned future will run in this executor.  Otherwise they would run
+  // on the same thread that called MarkFinished.
+  //
+  // This is necessary when (for example) an I/O task is completing a future.
+  // The continuations of that future should run on the CPU thread pool keeping
+  // CPU heavy work off the I/O thread pool.  So the I/O task should transfer
+  // the future to the CPU executor before returning.
+  //
+  // By default this method will only transfer if the future is not already completed.  If
+  // the future is already completed then any callback would be run synchronously and so
+  // no transfer is typically necessary.  However, in cases where you want to force a
+  // transfer (e.g. to help the scheduler break up units of work across multiple cores)
+  // then you can override this behavior with `always_transfer`.
+  template <typename T>
+  Future<T> Transfer(Future<T> future) {
+    return DoTransfer(std::move(future), false);
+  }
+
+  // Overload of Transfer which will always schedule callbacks on new threads even if the
+  // future is finished when the callback is added.
+  //
+  // This can be useful in cases where you want to ensure parallelism
+  template <typename T>
+  Future<T> TransferAlways(Future<T> future) {
+    return DoTransfer(std::move(future), true);
+  }
+
+  // Submit a callable and arguments for execution.  Return a future that
+  // will return the callable's result value once.
+  // The callable's arguments are copied before execution.
+  template <typename Function, typename... Args,
+            typename FutureType = typename ::arrow::detail::ContinueFuture::ForSignature<
+                Function && (Args && ...)>>
+  Result<FutureType> Submit(TaskHints hints, StopToken stop_token, Function&& func,
+                            Args&&... args) {
+    using ValueType = typename FutureType::ValueType;
+
+    auto future = FutureType::Make();
+    auto task = std::bind(::arrow::detail::ContinueFuture{}, future,
+                          std::forward<Function>(func), std::forward<Args>(args)...);
+    struct {
+      WeakFuture<ValueType> weak_fut;
+
+      void operator()(const Status& st) {
+        auto fut = weak_fut.get();
+        if (fut.is_valid()) {
+          fut.MarkFinished(st);
+        }
+      }
+    } stop_callback{WeakFuture<ValueType>(future)};
+    ARROW_RETURN_NOT_OK(SpawnReal(hints, std::move(task), std::move(stop_token),
+                                  std::move(stop_callback)));
+
+    return future;
+  }
+
+  template <typename Function, typename... Args,
+            typename FutureType = typename ::arrow::detail::ContinueFuture::ForSignature<
+                Function && (Args && ...)>>
+  Result<FutureType> Submit(StopToken stop_token, Function&& func, Args&&... args) {
+    return Submit(TaskHints{}, stop_token, std::forward<Function>(func),
+                  std::forward<Args>(args)...);
+  }
+
+  template <typename Function, typename... Args,
+            typename FutureType = typename ::arrow::detail::ContinueFuture::ForSignature<
+                Function && (Args && ...)>>
+  Result<FutureType> Submit(TaskHints hints, Function&& func, Args&&... args) {
+    return Submit(std::move(hints), StopToken::Unstoppable(),
+                  std::forward<Function>(func), std::forward<Args>(args)...);
+  }
+
+  template <typename Function, typename... Args,
+            typename FutureType = typename ::arrow::detail::ContinueFuture::ForSignature<
+                Function && (Args && ...)>>
+  Result<FutureType> Submit(Function&& func, Args&&... args) {
+    return Submit(TaskHints{}, StopToken::Unstoppable(), std::forward<Function>(func),
+                  std::forward<Args>(args)...);
+  }
+
+  // Return the level of parallelism (the number of tasks that may be executed
+  // concurrently).  This may be an approximate number.
+  virtual int GetCapacity() = 0;
+
+  // Return true if the thread from which this function is called is owned by this
+  // Executor. Returns false if this Executor does not support this property.
+  virtual bool OwnsThisThread() { return false; }
+
+  /// \brief An interface to represent something with a custom destructor
+  ///
+  /// \see KeepAlive
+  class ARROW_EXPORT Resource {
+   public:
+    virtual ~Resource() = default;
+  };
+
+  /// \brief Keep a resource alive until all executor threads have terminated
+  ///
+  /// Executors may have static storage duration.  In particular, the CPU and I/O
+  /// executors are currently implemented this way.  These threads may access other
+  /// objects with static storage duration such as the OpenTelemetry runtime context
+  /// the default memory pool, or other static executors.
+  ///
+  /// The order in which these objects are destroyed is difficult to control.  In order
+  /// to ensure those objects remain alive until all threads have finished those objects
+  /// should be wrapped in a Resource object and passed into this method.  The given
+  /// shared_ptr will be kept alive until all threads have finished their worker loops.
+  virtual void KeepAlive(std::shared_ptr<Resource> resource);
+
+ protected:
+  ARROW_DISALLOW_COPY_AND_ASSIGN(Executor);
+
+  Executor() = default;
+
+  template <typename T, typename FT = Future<T>, typename FTSync = typename FT::SyncType>
+  Future<T> DoTransfer(Future<T> future, bool always_transfer = false) {
+    auto transferred = Future<T>::Make();
+    if (always_transfer) {
+      CallbackOptions callback_options = CallbackOptions::Defaults();
+      callback_options.should_schedule = ShouldSchedule::Always;
+      callback_options.executor = this;
+      auto sync_callback = [transferred](const FTSync& result) mutable {
+        transferred.MarkFinished(result);
+      };
+      future.AddCallback(sync_callback, callback_options);
+      return transferred;
+    }
+
+    // We could use AddCallback's ShouldSchedule::IfUnfinished but we can save a bit of
+    // work by doing the test here.
+    auto callback = [this, transferred](const FTSync& result) mutable {
+      auto spawn_status =
+          Spawn([transferred, result]() mutable { transferred.MarkFinished(result); });
+      if (!spawn_status.ok()) {
+        transferred.MarkFinished(spawn_status);
+      }
+    };
+    auto callback_factory = [&callback]() { return callback; };
+    if (future.TryAddCallback(callback_factory)) {
+      return transferred;
+    }
+    // If the future is already finished and we aren't going to force spawn a thread
+    // then we don't need to add another layer of callback and can return the original
+    // future
+    return future;
+  }
+
+  // Subclassing API
+  virtual Status SpawnReal(TaskHints hints, FnOnce<void()> task, StopToken,
+                           StopCallback&&) = 0;
+};
+
+/// \brief An executor implementation that runs all tasks on a single thread using an
+/// event loop.
+///
+/// Note: Any sort of nested parallelism will deadlock this executor.  Blocking waits are
+/// fine but if one task needs to wait for another task it must be expressed as an
+/// asynchronous continuation.
+class ARROW_EXPORT SerialExecutor : public Executor {
+ public:
+  template <typename T = ::arrow::internal::Empty>
+  using TopLevelTask = internal::FnOnce<Future<T>(Executor*)>;
+
+  ~SerialExecutor() override;
+
+  int GetCapacity() override { return 1; };
+  bool OwnsThisThread() override;
+  Status SpawnReal(TaskHints hints, FnOnce<void()> task, StopToken,
+                   StopCallback&&) override;
+
+  /// \brief Runs the TopLevelTask and any scheduled tasks
+  ///
+  /// The TopLevelTask (or one of the tasks it schedules) must either return an invalid
+  /// status or call the finish signal. Failure to do this will result in a deadlock.  For
+  /// this reason it is preferable (if possible) to use the helper methods (below)
+  /// RunSynchronously/RunSerially which delegates the responsiblity onto a Future
+  /// producer's existing responsibility to always mark a future finished (which can
+  /// someday be aided by ARROW-12207).
+  template <typename T = internal::Empty, typename FT = Future<T>,
+            typename FTSync = typename FT::SyncType>
+  static FTSync RunInSerialExecutor(TopLevelTask<T> initial_task) {
+    Future<T> fut = SerialExecutor().Run<T>(std::move(initial_task));
+    return FutureToSync(fut);
+  }
+
+  /// \brief Transform an AsyncGenerator into an Iterator
+  ///
+  /// An event loop will be created and each call to Next will power the event loop with
+  /// the calling thread until the next item is ready to be delivered.
+  ///
+  /// Note: The iterator's destructor will run until the given generator is fully
+  /// exhausted. If you wish to abandon iteration before completion then the correct
+  /// approach is to use a stop token to cause the generator to exhaust early.
+  template <typename T>
+  static Iterator<T> IterateGenerator(
+      internal::FnOnce<Result<std::function<Future<T>()>>(Executor*)> initial_task) {
+    auto serial_executor = std::unique_ptr<SerialExecutor>(new SerialExecutor());
+    auto maybe_generator = std::move(initial_task)(serial_executor.get());
+    if (!maybe_generator.ok()) {
+      return MakeErrorIterator<T>(maybe_generator.status());
+    }
+    auto generator = maybe_generator.MoveValueUnsafe();
+    struct SerialIterator {
+      SerialIterator(std::unique_ptr<SerialExecutor> executor,
+                     std::function<Future<T>()> generator)
+          : executor(std::move(executor)), generator(std::move(generator)) {}
+      ARROW_DISALLOW_COPY_AND_ASSIGN(SerialIterator);
+      ARROW_DEFAULT_MOVE_AND_ASSIGN(SerialIterator);
+      ~SerialIterator() {
+        // A serial iterator must be consumed before it can be destroyed.  Allowing it to
+        // do otherwise would lead to resource leakage.  There will likely be deadlocks at
+        // this spot in the future but these will be the result of other bugs and not the
+        // fact that we are forcing consumption here.
+
+        // If a streaming API needs to support early abandonment then it should be done so
+        // with a cancellation token and not simply discarding the iterator and expecting
+        // the underlying work to clean up correctly.
+        if (executor && !executor->IsFinished()) {
+          while (true) {
+            Result<T> maybe_next = Next();
+            if (!maybe_next.ok() || IsIterationEnd(*maybe_next)) {
+              break;
+            }
+          }
+        }
+      }
+
+      Result<T> Next() {
+        executor->Unpause();
+        // This call may lead to tasks being scheduled in the serial executor
+        Future<T> next_fut = generator();
+        next_fut.AddCallback([this](const Result<T>& res) {
+          // If we're done iterating we should drain the rest of the tasks in the executor
+          if (!res.ok() || IsIterationEnd(*res)) {
+            executor->Finish();
+            return;
+          }
+          // Otherwise we will break out immediately, leaving the remaining tasks for
+          // the next call.
+          executor->Pause();
+        });
+        // Borrow this thread and run tasks until the future is finished
+        executor->RunLoop();
+        if (!next_fut.is_finished()) {
+          // Not clear this is possible since RunLoop wouldn't generally exit
+          // unless we paused/finished which would imply next_fut has been
+          // finished.
+          return Status::Invalid(
+              "Serial executor terminated before next result computed");
+        }
+        // At this point we may still have tasks in the executor, that is ok.
+        // We will run those tasks the next time through.
+        return next_fut.result();
+      }
+
+      std::unique_ptr<SerialExecutor> executor;
+      std::function<Future<T>()> generator;
+    };
+    return Iterator<T>(SerialIterator{std::move(serial_executor), std::move(generator)});
+  }
+
+ private:
+  SerialExecutor();
+
+  // State uses mutex
+  struct State;
+  std::shared_ptr<State> state_;
+
+  void RunLoop();
+  // We mark the serial executor "finished" when there should be
+  // no more tasks scheduled on it.  It's not strictly needed but
+  // can help catch bugs where we are trying to use the executor
+  // after we are done with it.
+  void Finish();
+  bool IsFinished();
+  // We pause the executor when we are running an async generator
+  // and we have received an item that we can deliver.
+  void Pause();
+  void Unpause();
+
+  template <typename T, typename FTSync = typename Future<T>::SyncType>
+  Future<T> Run(TopLevelTask<T> initial_task) {
+    auto final_fut = std::move(initial_task)(this);
+    final_fut.AddCallback([this](const FTSync&) { Finish(); });
+    RunLoop();
+    return final_fut;
+  }
+};
+
+/// An Executor implementation spawning tasks in FIFO manner on a fixed-size
+/// pool of worker threads.
+///
+/// Note: Any sort of nested parallelism will deadlock this executor.  Blocking waits are
+/// fine but if one task needs to wait for another task it must be expressed as an
+/// asynchronous continuation.
+class ARROW_EXPORT ThreadPool : public Executor {
+ public:
+  // Construct a thread pool with the given number of worker threads
+  static Result<std::shared_ptr<ThreadPool>> Make(int threads);
+
+  // Like Make(), but takes care that the returned ThreadPool is compatible
+  // with destruction late at process exit.
+  static Result<std::shared_ptr<ThreadPool>> MakeEternal(int threads);
+
+  // Destroy thread pool; the pool will first be shut down
+  ~ThreadPool() override;
+
+  // Return the desired number of worker threads.
+  // The actual number of workers may lag a bit before being adjusted to
+  // match this value.
+  int GetCapacity() override;
+
+  bool OwnsThisThread() override;
+
+  // Return the number of tasks either running or in the queue.
+  int GetNumTasks();
+
+  // Dynamically change the number of worker threads.
+  //
+  // This function always returns immediately.
+  // If fewer threads are running than this number, new threads are spawned
+  // on-demand when needed for task execution.
+  // If more threads are running than this number, excess threads are reaped
+  // as soon as possible.
+  Status SetCapacity(int threads);
+
+  // Heuristic for the default capacity of a thread pool for CPU-bound tasks.
+  // This is exposed as a static method to help with testing.
+  static int DefaultCapacity();
+
+  // Shutdown the pool.  Once the pool starts shutting down, new tasks
+  // cannot be submitted anymore.
+  // If "wait" is true, shutdown waits for all pending tasks to be finished.
+  // If "wait" is false, workers are stopped as soon as currently executing
+  // tasks are finished.
+  Status Shutdown(bool wait = true);
+
+  // Wait for the thread pool to become idle
+  //
+  // This is useful for sequencing tests
+  void WaitForIdle();
+
+  void KeepAlive(std::shared_ptr<Executor::Resource> resource) override;
+
+  struct State;
+
+ protected:
+  FRIEND_TEST(TestThreadPool, SetCapacity);
+  FRIEND_TEST(TestGlobalThreadPool, Capacity);
+  ARROW_FRIEND_EXPORT friend ThreadPool* GetCpuThreadPool();
+
+  ThreadPool();
+
+  Status SpawnReal(TaskHints hints, FnOnce<void()> task, StopToken,
+                   StopCallback&&) override;
+
+  // Collect finished worker threads, making sure the OS threads have exited
+  void CollectFinishedWorkersUnlocked();
+  // Launch a given number of additional workers
+  void LaunchWorkersUnlocked(int threads);
+  // Get the current actual capacity
+  int GetActualCapacity();
+
+  static std::shared_ptr<ThreadPool> MakeCpuThreadPool();
+
+  std::shared_ptr<State> sp_state_;
+  State* state_;
+  bool shutdown_on_destroy_;
+};
+
+// Return the process-global thread pool for CPU-bound tasks.
+ARROW_EXPORT ThreadPool* GetCpuThreadPool();
+
+/// \brief Potentially run an async operation serially (if use_threads is false)
+/// \see RunSerially
+///
+/// If `use_threads` is true, the global CPU executor is used.
+/// If `use_threads` is false, a temporary SerialExecutor is used.
+/// `get_future` is called (from this thread) with the chosen executor and must
+/// return a future that will eventually finish. This function returns once the
+/// future has finished.
+template <typename Fut, typename ValueType = typename Fut::ValueType>
+typename Fut::SyncType RunSynchronously(FnOnce<Fut(Executor*)> get_future,
+                                        bool use_threads) {
+  if (use_threads) {
+    auto fut = std::move(get_future)(GetCpuThreadPool());
+    return FutureToSync(fut);
+  } else {
+    return SerialExecutor::RunInSerialExecutor<ValueType>(std::move(get_future));
+  }
+}
+
+/// \brief Potentially iterate an async generator serially (if use_threads is false)
+/// \see IterateGenerator
+///
+/// If `use_threads` is true, the global CPU executor will be used.  Each call to
+///   the iterator will simply wait until the next item is available.  Tasks may run in
+///   the background between calls.
+///
+/// If `use_threads` is false, the calling thread only will be used.  Each call to
+///   the iterator will use the calling thread to do enough work to generate one item.
+///   Tasks will be left in a queue until the next call and no work will be done between
+///   calls.
+template <typename T>
+Iterator<T> IterateSynchronously(
+    FnOnce<Result<std::function<Future<T>()>>(Executor*)> get_gen, bool use_threads) {
+  if (use_threads) {
+    auto maybe_gen = std::move(get_gen)(GetCpuThreadPool());
+    if (!maybe_gen.ok()) {
+      return MakeErrorIterator<T>(maybe_gen.status());
+    }
+    return MakeGeneratorIterator(*maybe_gen);
+  } else {
+    return SerialExecutor::IterateGenerator(std::move(get_gen));
+  }
+}
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/time.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/time.h
@@ -0,0 +1,83 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <chrono>
+#include <memory>
+#include <utility>
+
+#include "arrow/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace util {
+
+enum DivideOrMultiply {
+  MULTIPLY,
+  DIVIDE,
+};
+
+ARROW_EXPORT
+std::pair<DivideOrMultiply, int64_t> GetTimestampConversion(TimeUnit::type in_unit,
+                                                            TimeUnit::type out_unit);
+
+// Converts a Timestamp value into another Timestamp value.
+//
+// This function takes care of properly transforming from one unit to another.
+//
+// \param[in] in the input type. Must be TimestampType.
+// \param[in] out the output type. Must be TimestampType.
+// \param[in] value the input value.
+//
+// \return The converted value, or an error.
+ARROW_EXPORT Result<int64_t> ConvertTimestampValue(const std::shared_ptr<DataType>& in,
+                                                   const std::shared_ptr<DataType>& out,
+                                                   int64_t value);
+
+template <typename Visitor, typename... Args>
+decltype(std::declval<Visitor>()(std::chrono::seconds{}, std::declval<Args&&>()...))
+VisitDuration(TimeUnit::type unit, Visitor&& visitor, Args&&... args) {
+  switch (unit) {
+    default:
+    case TimeUnit::SECOND:
+      break;
+    case TimeUnit::MILLI:
+      return visitor(std::chrono::milliseconds{}, std::forward<Args>(args)...);
+    case TimeUnit::MICRO:
+      return visitor(std::chrono::microseconds{}, std::forward<Args>(args)...);
+    case TimeUnit::NANO:
+      return visitor(std::chrono::nanoseconds{}, std::forward<Args>(args)...);
+  }
+  return visitor(std::chrono::seconds{}, std::forward<Args>(args)...);
+}
+
+/// Convert a count of seconds to the corresponding count in a different TimeUnit
+struct CastSecondsToUnitImpl {
+  template <typename Duration>
+  int64_t operator()(Duration, int64_t seconds) {
+    auto duration = std::chrono::duration_cast<Duration>(std::chrono::seconds{seconds});
+    return static_cast<int64_t>(duration.count());
+  }
+};
+
+inline int64_t CastSecondsToUnit(TimeUnit::type unit, int64_t seconds) {
+  return VisitDuration(unit, CastSecondsToUnitImpl{}, seconds);
+}
+
+}  // namespace util
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/tracing.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/tracing.h
@@ -0,0 +1,41 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace util {
+namespace tracing {
+
+class ARROW_EXPORT SpanDetails {
+ public:
+  virtual ~SpanDetails() {}
+};
+
+class ARROW_EXPORT Span {
+ public:
+  Span() noexcept;
+  std::unique_ptr<SpanDetails> details;
+};
+
+}  // namespace tracing
+}  // namespace util
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/trie.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/trie.h
@@ -0,0 +1,243 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <iosfwd>
+#include <limits>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "arrow/status.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+
+// A non-zero-terminated small string class.
+// std::string usually has a small string optimization
+// (see review at https://shaharmike.com/cpp/std-string/)
+// but this one allows tight control and optimization of memory layout.
+template <uint8_t N>
+class SmallString {
+ public:
+  SmallString() : length_(0) {}
+
+  template <typename T>
+  SmallString(const T& v) {  // NOLINT implicit constructor
+    *this = std::string_view(v);
+  }
+
+  SmallString& operator=(const std::string_view s) {
+#ifndef NDEBUG
+    CheckSize(s.size());
+#endif
+    length_ = static_cast<uint8_t>(s.size());
+    std::memcpy(data_, s.data(), length_);
+    return *this;
+  }
+
+  SmallString& operator=(const std::string& s) {
+    *this = std::string_view(s);
+    return *this;
+  }
+
+  SmallString& operator=(const char* s) {
+    *this = std::string_view(s);
+    return *this;
+  }
+
+  explicit operator std::string_view() const { return std::string_view(data_, length_); }
+
+  const char* data() const { return data_; }
+  size_t length() const { return length_; }
+  bool empty() const { return length_ == 0; }
+  char operator[](size_t pos) const {
+#ifdef NDEBUG
+    assert(pos <= length_);
+#endif
+    return data_[pos];
+  }
+
+  SmallString substr(size_t pos) const {
+    return SmallString(std::string_view(*this).substr(pos));
+  }
+
+  SmallString substr(size_t pos, size_t count) const {
+    return SmallString(std::string_view(*this).substr(pos, count));
+  }
+
+  template <typename T>
+  bool operator==(T&& other) const {
+    return std::string_view(*this) == std::string_view(std::forward<T>(other));
+  }
+
+  template <typename T>
+  bool operator!=(T&& other) const {
+    return std::string_view(*this) != std::string_view(std::forward<T>(other));
+  }
+
+ protected:
+  uint8_t length_;
+  char data_[N];
+
+  void CheckSize(size_t n) { assert(n <= N); }
+};
+
+template <uint8_t N>
+std::ostream& operator<<(std::ostream& os, const SmallString<N>& str) {
+  return os << std::string_view(str);
+}
+
+// A trie class for byte strings, optimized for small sets of short strings.
+// This class is immutable by design, use a TrieBuilder to construct it.
+class ARROW_EXPORT Trie {
+  using index_type = int16_t;
+  using fast_index_type = int_fast16_t;
+  static constexpr auto kMaxIndex = std::numeric_limits<index_type>::max();
+
+ public:
+  Trie() : size_(0) {}
+  Trie(Trie&&) = default;
+  Trie& operator=(Trie&&) = default;
+
+  int32_t Find(std::string_view s) const {
+    const Node* node = &nodes_[0];
+    fast_index_type pos = 0;
+    if (s.length() > static_cast<size_t>(kMaxIndex)) {
+      return -1;
+    }
+    fast_index_type remaining = static_cast<fast_index_type>(s.length());
+
+    while (remaining > 0) {
+      auto substring_length = node->substring_length();
+      if (substring_length > 0) {
+        auto substring_data = node->substring_data();
+        if (remaining < substring_length) {
+          // Input too short
+          return -1;
+        }
+        for (fast_index_type i = 0; i < substring_length; ++i) {
+          if (s[pos++] != substring_data[i]) {
+            // Mismatching substring
+            return -1;
+          }
+          --remaining;
+        }
+        if (remaining == 0) {
+          // Matched node exactly
+          return node->found_index_;
+        }
+      }
+      // Lookup child using next input character
+      if (node->child_lookup_ == -1) {
+        // Input too long
+        return -1;
+      }
+      auto c = static_cast<uint8_t>(s[pos++]);
+      --remaining;
+      auto child_index = lookup_table_[node->child_lookup_ * 256 + c];
+      if (child_index == -1) {
+        // Child not found
+        return -1;
+      }
+      node = &nodes_[child_index];
+    }
+
+    // Input exhausted
+    if (node->substring_.empty()) {
+      // Matched node exactly
+      return node->found_index_;
+    } else {
+      return -1;
+    }
+  }
+
+  Status Validate() const;
+
+  void Dump() const;
+
+ protected:
+  static constexpr size_t kNodeSize = 16;
+  static constexpr auto kMaxSubstringLength =
+      kNodeSize - 2 * sizeof(index_type) - sizeof(int8_t);
+
+  struct Node {
+    // If this node is a valid end of string, index of found string, otherwise -1
+    index_type found_index_;
+    // Base index for child lookup in lookup_table_ (-1 if no child nodes)
+    index_type child_lookup_;
+    // The substring for this node.
+    SmallString<kMaxSubstringLength> substring_;
+
+    fast_index_type substring_length() const {
+      return static_cast<fast_index_type>(substring_.length());
+    }
+    const char* substring_data() const { return substring_.data(); }
+  };
+
+  static_assert(sizeof(Node) == kNodeSize, "Unexpected node size");
+
+  ARROW_DISALLOW_COPY_AND_ASSIGN(Trie);
+
+  void Dump(const Node* node, const std::string& indent) const;
+
+  // Node table: entry 0 is the root node
+  std::vector<Node> nodes_;
+
+  // Indexed lookup structure: gives index in node table, or -1 if not found
+  std::vector<index_type> lookup_table_;
+
+  // Number of entries
+  index_type size_;
+
+  friend class TrieBuilder;
+};
+
+class ARROW_EXPORT TrieBuilder {
+  using index_type = Trie::index_type;
+  using fast_index_type = Trie::fast_index_type;
+
+ public:
+  TrieBuilder();
+  Status Append(std::string_view s, bool allow_duplicate = false);
+  Trie Finish();
+
+ protected:
+  // Extend the lookup table by 256 entries, return the index of the new span
+  Status ExtendLookupTable(index_type* out_lookup_index);
+  // Split the node given by the index at the substring index `split_at`
+  Status SplitNode(fast_index_type node_index, fast_index_type split_at);
+  // Append an already constructed child node to the parent
+  Status AppendChildNode(Trie::Node* parent, uint8_t ch, Trie::Node&& node);
+  // Create a matching child node from this parent
+  Status CreateChildNode(Trie::Node* parent, uint8_t ch, std::string_view substring);
+  Status CreateChildNode(Trie::Node* parent, char ch, std::string_view substring);
+
+  Trie trie_;
+
+  static constexpr auto kMaxIndex = std::numeric_limits<index_type>::max();
+};
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/type_fwd.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/type_fwd.h
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+namespace arrow {
+
+namespace internal {
+struct Empty;
+}  // namespace internal
+
+template <typename T = internal::Empty>
+class WeakFuture;
+class FutureWaiter;
+
+class TimestampParser;
+
+namespace internal {
+
+class Executor;
+class TaskGroup;
+class ThreadPool;
+class CpuInfo;
+
+}  // namespace internal
+
+struct Compression {
+  /// \brief Compression algorithm
+  enum type {
+    UNCOMPRESSED,
+    SNAPPY,
+    GZIP,
+    BROTLI,
+    ZSTD,
+    LZ4,
+    LZ4_FRAME,
+    LZO,
+    BZ2,
+    LZ4_HADOOP
+  };
+};
+
+namespace util {
+class AsyncTaskScheduler;
+class Compressor;
+class Decompressor;
+class Codec;
+}  // namespace util
+
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/type_traits.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/type_traits.h
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <type_traits>
+
+namespace arrow {
+namespace internal {
+
+/// \brief Metafunction to allow checking if a type matches any of another set of types
+template <typename...>
+struct IsOneOf : std::false_type {};  /// Base case: nothing has matched
+
+template <typename T, typename U, typename... Args>
+struct IsOneOf<T, U, Args...> {
+  /// Recursive case: T == U or T matches any other types provided (not including U).
+  static constexpr bool value = std::is_same<T, U>::value || IsOneOf<T, Args...>::value;
+};
+
+/// \brief Shorthand for using IsOneOf + std::enable_if
+template <typename T, typename... Args>
+using EnableIfIsOneOf = typename std::enable_if<IsOneOf<T, Args...>::value, T>::type;
+
+/// \brief is_null_pointer from C++17
+template <typename T>
+struct is_null_pointer : std::is_same<std::nullptr_t, typename std::remove_cv<T>::type> {
+};
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/ubsan.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/ubsan.h
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Contains utilities for making UBSan happy.
+
+#pragma once
+
+#include <cstring>
+#include <memory>
+#include <type_traits>
+
+#include "arrow/util/macros.h"
+
+namespace arrow {
+namespace util {
+
+namespace internal {
+
+constexpr uint8_t kNonNullFiller = 0;
+
+}  // namespace internal
+
+/// \brief Returns maybe_null if not null or a non-null pointer to an arbitrary memory
+/// that shouldn't be dereferenced.
+///
+/// Memset/Memcpy are undefined when a nullptr is passed as an argument use this utility
+/// method to wrap locations where this could happen.
+///
+/// Note: Flatbuffers has UBSan warnings if a zero length vector is passed.
+/// https://github.com/google/flatbuffers/pull/5355 is trying to resolve
+/// them.
+template <typename T>
+inline T* MakeNonNull(T* maybe_null = NULLPTR) {
+  if (ARROW_PREDICT_TRUE(maybe_null != NULLPTR)) {
+    return maybe_null;
+  }
+
+  return const_cast<T*>(reinterpret_cast<const T*>(&internal::kNonNullFiller));
+}
+
+template <typename T>
+inline typename std::enable_if<std::is_trivial<T>::value, T>::type SafeLoadAs(
+    const uint8_t* unaligned) {
+  typename std::remove_const<T>::type ret;
+  std::memcpy(&ret, unaligned, sizeof(T));
+  return ret;
+}
+
+template <typename T>
+inline typename std::enable_if<std::is_trivial<T>::value, T>::type SafeLoad(
+    const T* unaligned) {
+  typename std::remove_const<T>::type ret;
+  std::memcpy(&ret, unaligned, sizeof(T));
+  return ret;
+}
+
+template <typename U, typename T>
+inline typename std::enable_if<std::is_trivial<T>::value && std::is_trivial<U>::value &&
+                                   sizeof(T) == sizeof(U),
+                               U>::type
+SafeCopy(T value) {
+  typename std::remove_const<U>::type ret;
+  std::memcpy(&ret, &value, sizeof(T));
+  return ret;
+}
+
+template <typename T>
+inline typename std::enable_if<std::is_trivial<T>::value, void>::type SafeStore(
+    void* unaligned, T value) {
+  std::memcpy(unaligned, &value, sizeof(T));
+}
+
+}  // namespace util
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/unreachable.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/unreachable.h
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/util/visibility.h"
+
+#include <string_view>
+
+namespace arrow {
+
+[[noreturn]] ARROW_EXPORT void Unreachable(const char* message = "Unreachable");
+
+[[noreturn]] ARROW_EXPORT void Unreachable(std::string_view message);
+
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/uri.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/uri.h
@@ -0,0 +1,118 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "arrow/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace internal {
+
+/// \brief A parsed URI
+class ARROW_EXPORT Uri {
+ public:
+  Uri();
+  ~Uri();
+  Uri(Uri&&);
+  Uri& operator=(Uri&&);
+
+  // XXX Should we use std::string_view instead?  These functions are
+  // not performance-critical.
+
+  /// The URI scheme, such as "http", or the empty string if the URI has no
+  /// explicit scheme.
+  std::string scheme() const;
+
+  /// Convenience function that returns true if the scheme() is "file"
+  bool is_file_scheme() const;
+
+  /// Whether the URI has an explicit host name.  This may return true if
+  /// the URI has an empty host (e.g. "file:///tmp/foo"), while it returns
+  /// false is the URI has not host component at all (e.g. "file:/tmp/foo").
+  bool has_host() const;
+  /// The URI host name, such as "localhost", "127.0.0.1" or "::1", or the empty
+  /// string is the URI does not have a host component.
+  std::string host() const;
+
+  /// The URI port number, as a string such as "80", or the empty string is the URI
+  /// does not have a port number component.
+  std::string port_text() const;
+  /// The URI port parsed as an integer, or -1 if the URI does not have a port
+  /// number component.
+  int32_t port() const;
+
+  /// The username specified in the URI.
+  std::string username() const;
+  /// The password specified in the URI.
+  std::string password() const;
+
+  /// The URI path component.
+  std::string path() const;
+
+  /// The URI query string
+  std::string query_string() const;
+
+  /// The URI query items
+  ///
+  /// Note this API doesn't allow differentiating between an empty value
+  /// and a missing value, such in "a&b=1" vs. "a=&b=1".
+  Result<std::vector<std::pair<std::string, std::string>>> query_items() const;
+
+  /// Get the string representation of this URI.
+  const std::string& ToString() const;
+
+  /// Factory function to parse a URI from its string representation.
+  Status Parse(const std::string& uri_string);
+
+ private:
+  struct Impl;
+  std::unique_ptr<Impl> impl_;
+};
+
+/// Percent-encode the input string, for use e.g. as a URI query parameter.
+///
+/// This will escape directory separators, making this function unsuitable
+/// for encoding URI paths directly. See UriFromAbsolutePath() instead.
+ARROW_EXPORT
+std::string UriEscape(std::string_view s);
+
+ARROW_EXPORT
+std::string UriUnescape(std::string_view s);
+
+/// Encode a host for use within a URI, such as "localhost",
+/// "127.0.0.1", or "[::1]".
+ARROW_EXPORT
+std::string UriEncodeHost(std::string_view host);
+
+/// Whether the string is a syntactically valid URI scheme according to RFC 3986.
+ARROW_EXPORT
+bool IsValidUriScheme(std::string_view s);
+
+/// Create a file uri from a given absolute path
+ARROW_EXPORT
+Result<std::string> UriFromAbsolutePath(std::string_view path);
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/utf8.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/utf8.h
@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+#include <string>
+#include <string_view>
+
+#include "arrow/type_fwd.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace util {
+
+// Convert a UTF8 string to a wstring (either UTF16 or UTF32, depending
+// on the wchar_t width).
+ARROW_EXPORT Result<std::wstring> UTF8ToWideString(const std::string& source);
+
+// Similarly, convert a wstring to a UTF8 string.
+ARROW_EXPORT Result<std::string> WideStringToUTF8(const std::wstring& source);
+
+// This function needs to be called before doing UTF8 validation.
+ARROW_EXPORT void InitializeUTF8();
+
+ARROW_EXPORT bool ValidateUTF8(const uint8_t* data, int64_t size);
+
+ARROW_EXPORT bool ValidateUTF8(const std::string_view& str);
+
+// Skip UTF8 byte order mark, if any.
+ARROW_EXPORT
+Result<const uint8_t*> SkipUTF8BOM(const uint8_t* data, int64_t size);
+
+static constexpr uint32_t kMaxUnicodeCodepoint = 0x110000;
+
+}  // namespace util
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/value_parsing.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/value_parsing.h
@@ -0,0 +1,928 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This is a private header for string-to-number parsing utilities
+
+#pragma once
+
+#include <cassert>
+#include <chrono>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <type_traits>
+
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/config.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/time.h"
+#include "arrow/util/visibility.h"
+#include "arrow/vendored/datetime.h"
+#include "arrow/vendored/strptime.h"
+
+namespace arrow {
+
+/// \brief A virtual string to timestamp parser
+class ARROW_EXPORT TimestampParser {
+ public:
+  virtual ~TimestampParser() = default;
+
+  virtual bool operator()(const char* s, size_t length, TimeUnit::type out_unit,
+                          int64_t* out,
+                          bool* out_zone_offset_present = NULLPTR) const = 0;
+
+  virtual const char* kind() const = 0;
+
+  virtual const char* format() const;
+
+  /// \brief Create a TimestampParser that recognizes strptime-like format strings
+  static std::shared_ptr<TimestampParser> MakeStrptime(std::string format);
+
+  /// \brief Create a TimestampParser that recognizes (locale-agnostic) ISO8601
+  /// timestamps
+  static std::shared_ptr<TimestampParser> MakeISO8601();
+};
+
+namespace internal {
+
+/// \brief The entry point for conversion from strings.
+///
+/// Specializations of StringConverter for `ARROW_TYPE` must define:
+/// - A default constructible member type `value_type` which will be yielded on a
+///   successful parse.
+/// - The static member function `Convert`, callable with signature
+///   `(const ARROW_TYPE& t, const char* s, size_t length, value_type* out)`.
+///   `Convert` returns truthy for successful parses and assigns the parsed values to
+///   `*out`. Parameters required for parsing (for example a timestamp's TimeUnit)
+///   are acquired from the type parameter `t`.
+template <typename ARROW_TYPE, typename Enable = void>
+struct StringConverter;
+
+template <typename T>
+struct is_parseable {
+  template <typename U, typename = typename StringConverter<U>::value_type>
+  static std::true_type Test(U*);
+
+  template <typename U>
+  static std::false_type Test(...);
+
+  static constexpr bool value = decltype(Test<T>(NULLPTR))::value;
+};
+
+template <typename T, typename R = void>
+using enable_if_parseable = enable_if_t<is_parseable<T>::value, R>;
+
+template <>
+struct StringConverter<BooleanType> {
+  using value_type = bool;
+
+  bool Convert(const BooleanType&, const char* s, size_t length, value_type* out) {
+    if (length == 1) {
+      // "0" or "1"?
+      if (s[0] == '0') {
+        *out = false;
+        return true;
+      }
+      if (s[0] == '1') {
+        *out = true;
+        return true;
+      }
+      return false;
+    }
+    if (length == 4) {
+      // "true"?
+      *out = true;
+      return ((s[0] == 't' || s[0] == 'T') && (s[1] == 'r' || s[1] == 'R') &&
+              (s[2] == 'u' || s[2] == 'U') && (s[3] == 'e' || s[3] == 'E'));
+    }
+    if (length == 5) {
+      // "false"?
+      *out = false;
+      return ((s[0] == 'f' || s[0] == 'F') && (s[1] == 'a' || s[1] == 'A') &&
+              (s[2] == 'l' || s[2] == 'L') && (s[3] == 's' || s[3] == 'S') &&
+              (s[4] == 'e' || s[4] == 'E'));
+    }
+    return false;
+  }
+};
+
+// Ideas for faster float parsing:
+// - http://rapidjson.org/md_doc_internals.html#ParsingDouble
+// - https://github.com/google/double-conversion [used here]
+// - https://github.com/achan001/dtoa-fast
+
+ARROW_EXPORT
+bool StringToFloat(const char* s, size_t length, char decimal_point, float* out);
+
+ARROW_EXPORT
+bool StringToFloat(const char* s, size_t length, char decimal_point, double* out);
+
+template <>
+struct StringConverter<FloatType> {
+  using value_type = float;
+
+  explicit StringConverter(char decimal_point = '.') : decimal_point(decimal_point) {}
+
+  bool Convert(const FloatType&, const char* s, size_t length, value_type* out) {
+    return ARROW_PREDICT_TRUE(StringToFloat(s, length, decimal_point, out));
+  }
+
+ private:
+  const char decimal_point;
+};
+
+template <>
+struct StringConverter<DoubleType> {
+  using value_type = double;
+
+  explicit StringConverter(char decimal_point = '.') : decimal_point(decimal_point) {}
+
+  bool Convert(const DoubleType&, const char* s, size_t length, value_type* out) {
+    return ARROW_PREDICT_TRUE(StringToFloat(s, length, decimal_point, out));
+  }
+
+ private:
+  const char decimal_point;
+};
+
+// NOTE: HalfFloatType would require a half<->float conversion library
+
+inline uint8_t ParseDecimalDigit(char c) { return static_cast<uint8_t>(c - '0'); }
+
+#define PARSE_UNSIGNED_ITERATION(C_TYPE)          \
+  if (length > 0) {                               \
+    uint8_t digit = ParseDecimalDigit(*s++);      \
+    result = static_cast<C_TYPE>(result * 10U);   \
+    length--;                                     \
+    if (ARROW_PREDICT_FALSE(digit > 9U)) {        \
+      /* Non-digit */                             \
+      return false;                               \
+    }                                             \
+    result = static_cast<C_TYPE>(result + digit); \
+  } else {                                        \
+    break;                                        \
+  }
+
+#define PARSE_UNSIGNED_ITERATION_LAST(C_TYPE)                                     \
+  if (length > 0) {                                                               \
+    if (ARROW_PREDICT_FALSE(result > std::numeric_limits<C_TYPE>::max() / 10U)) { \
+      /* Overflow */                                                              \
+      return false;                                                               \
+    }                                                                             \
+    uint8_t digit = ParseDecimalDigit(*s++);                                      \
+    result = static_cast<C_TYPE>(result * 10U);                                   \
+    C_TYPE new_result = static_cast<C_TYPE>(result + digit);                      \
+    if (ARROW_PREDICT_FALSE(--length > 0)) {                                      \
+      /* Too many digits */                                                       \
+      return false;                                                               \
+    }                                                                             \
+    if (ARROW_PREDICT_FALSE(digit > 9U)) {                                        \
+      /* Non-digit */                                                             \
+      return false;                                                               \
+    }                                                                             \
+    if (ARROW_PREDICT_FALSE(new_result < result)) {                               \
+      /* Overflow */                                                              \
+      return false;                                                               \
+    }                                                                             \
+    result = new_result;                                                          \
+  }
+
+inline bool ParseUnsigned(const char* s, size_t length, uint8_t* out) {
+  uint8_t result = 0;
+
+  do {
+    PARSE_UNSIGNED_ITERATION(uint8_t);
+    PARSE_UNSIGNED_ITERATION(uint8_t);
+    PARSE_UNSIGNED_ITERATION_LAST(uint8_t);
+  } while (false);
+  *out = result;
+  return true;
+}
+
+inline bool ParseUnsigned(const char* s, size_t length, uint16_t* out) {
+  uint16_t result = 0;
+  do {
+    PARSE_UNSIGNED_ITERATION(uint16_t);
+    PARSE_UNSIGNED_ITERATION(uint16_t);
+    PARSE_UNSIGNED_ITERATION(uint16_t);
+    PARSE_UNSIGNED_ITERATION(uint16_t);
+    PARSE_UNSIGNED_ITERATION_LAST(uint16_t);
+  } while (false);
+  *out = result;
+  return true;
+}
+
+inline bool ParseUnsigned(const char* s, size_t length, uint32_t* out) {
+  uint32_t result = 0;
+  do {
+    PARSE_UNSIGNED_ITERATION(uint32_t);
+    PARSE_UNSIGNED_ITERATION(uint32_t);
+    PARSE_UNSIGNED_ITERATION(uint32_t);
+    PARSE_UNSIGNED_ITERATION(uint32_t);
+    PARSE_UNSIGNED_ITERATION(uint32_t);
+
+    PARSE_UNSIGNED_ITERATION(uint32_t);
+    PARSE_UNSIGNED_ITERATION(uint32_t);
+    PARSE_UNSIGNED_ITERATION(uint32_t);
+    PARSE_UNSIGNED_ITERATION(uint32_t);
+
+    PARSE_UNSIGNED_ITERATION_LAST(uint32_t);
+  } while (false);
+  *out = result;
+  return true;
+}
+
+inline bool ParseUnsigned(const char* s, size_t length, uint64_t* out) {
+  uint64_t result = 0;
+  do {
+    PARSE_UNSIGNED_ITERATION(uint64_t);
+    PARSE_UNSIGNED_ITERATION(uint64_t);
+    PARSE_UNSIGNED_ITERATION(uint64_t);
+    PARSE_UNSIGNED_ITERATION(uint64_t);
+    PARSE_UNSIGNED_ITERATION(uint64_t);
+
+    PARSE_UNSIGNED_ITERATION(uint64_t);
+    PARSE_UNSIGNED_ITERATION(uint64_t);
+    PARSE_UNSIGNED_ITERATION(uint64_t);
+    PARSE_UNSIGNED_ITERATION(uint64_t);
+    PARSE_UNSIGNED_ITERATION(uint64_t);
+
+    PARSE_UNSIGNED_ITERATION(uint64_t);
+    PARSE_UNSIGNED_ITERATION(uint64_t);
+    PARSE_UNSIGNED_ITERATION(uint64_t);
+    PARSE_UNSIGNED_ITERATION(uint64_t);
+    PARSE_UNSIGNED_ITERATION(uint64_t);
+
+    PARSE_UNSIGNED_ITERATION(uint64_t);
+    PARSE_UNSIGNED_ITERATION(uint64_t);
+    PARSE_UNSIGNED_ITERATION(uint64_t);
+    PARSE_UNSIGNED_ITERATION(uint64_t);
+
+    PARSE_UNSIGNED_ITERATION_LAST(uint64_t);
+  } while (false);
+  *out = result;
+  return true;
+}
+
+#undef PARSE_UNSIGNED_ITERATION
+#undef PARSE_UNSIGNED_ITERATION_LAST
+
+template <typename T>
+bool ParseHex(const char* s, size_t length, T* out) {
+  // lets make sure that the length of the string is not too big
+  if (!ARROW_PREDICT_TRUE(sizeof(T) * 2 >= length && length > 0)) {
+    return false;
+  }
+  T result = 0;
+  for (size_t i = 0; i < length; i++) {
+    result = static_cast<T>(result << 4);
+    if (s[i] >= '0' && s[i] <= '9') {
+      result = static_cast<T>(result | (s[i] - '0'));
+    } else if (s[i] >= 'A' && s[i] <= 'F') {
+      result = static_cast<T>(result | (s[i] - 'A' + 10));
+    } else if (s[i] >= 'a' && s[i] <= 'f') {
+      result = static_cast<T>(result | (s[i] - 'a' + 10));
+    } else {
+      /* Non-digit */
+      return false;
+    }
+  }
+  *out = result;
+  return true;
+}
+
+template <class ARROW_TYPE>
+struct StringToUnsignedIntConverterMixin {
+  using value_type = typename ARROW_TYPE::c_type;
+
+  bool Convert(const ARROW_TYPE&, const char* s, size_t length, value_type* out) {
+    if (ARROW_PREDICT_FALSE(length == 0)) {
+      return false;
+    }
+    // If it starts with 0x then its hex
+    if (length > 2 && s[0] == '0' && ((s[1] == 'x') || (s[1] == 'X'))) {
+      length -= 2;
+      s += 2;
+
+      return ARROW_PREDICT_TRUE(ParseHex(s, length, out));
+    }
+    // Skip leading zeros
+    while (length > 0 && *s == '0') {
+      length--;
+      s++;
+    }
+    return ParseUnsigned(s, length, out);
+  }
+};
+
+template <>
+struct StringConverter<UInt8Type> : public StringToUnsignedIntConverterMixin<UInt8Type> {
+  using StringToUnsignedIntConverterMixin<UInt8Type>::StringToUnsignedIntConverterMixin;
+};
+
+template <>
+struct StringConverter<UInt16Type>
+    : public StringToUnsignedIntConverterMixin<UInt16Type> {
+  using StringToUnsignedIntConverterMixin<UInt16Type>::StringToUnsignedIntConverterMixin;
+};
+
+template <>
+struct StringConverter<UInt32Type>
+    : public StringToUnsignedIntConverterMixin<UInt32Type> {
+  using StringToUnsignedIntConverterMixin<UInt32Type>::StringToUnsignedIntConverterMixin;
+};
+
+template <>
+struct StringConverter<UInt64Type>
+    : public StringToUnsignedIntConverterMixin<UInt64Type> {
+  using StringToUnsignedIntConverterMixin<UInt64Type>::StringToUnsignedIntConverterMixin;
+};
+
+template <class ARROW_TYPE>
+struct StringToSignedIntConverterMixin {
+  using value_type = typename ARROW_TYPE::c_type;
+  using unsigned_type = typename std::make_unsigned<value_type>::type;
+
+  bool Convert(const ARROW_TYPE&, const char* s, size_t length, value_type* out) {
+    static constexpr auto max_positive =
+        static_cast<unsigned_type>(std::numeric_limits<value_type>::max());
+    // Assuming two's complement
+    static constexpr unsigned_type max_negative = max_positive + 1;
+    bool negative = false;
+    unsigned_type unsigned_value = 0;
+
+    if (ARROW_PREDICT_FALSE(length == 0)) {
+      return false;
+    }
+    // If it starts with 0x then its hex
+    if (length > 2 && s[0] == '0' && ((s[1] == 'x') || (s[1] == 'X'))) {
+      length -= 2;
+      s += 2;
+
+      if (!ARROW_PREDICT_TRUE(ParseHex(s, length, &unsigned_value))) {
+        return false;
+      }
+      *out = static_cast<value_type>(unsigned_value);
+      return true;
+    }
+
+    if (*s == '-') {
+      negative = true;
+      s++;
+      if (--length == 0) {
+        return false;
+      }
+    }
+    // Skip leading zeros
+    while (length > 0 && *s == '0') {
+      length--;
+      s++;
+    }
+    if (!ARROW_PREDICT_TRUE(ParseUnsigned(s, length, &unsigned_value))) {
+      return false;
+    }
+    if (negative) {
+      if (ARROW_PREDICT_FALSE(unsigned_value > max_negative)) {
+        return false;
+      }
+      // To avoid both compiler warnings (with unsigned negation)
+      // and undefined behaviour (with signed negation overflow),
+      // use the expanded formula for 2's complement negation.
+      *out = static_cast<value_type>(~unsigned_value + 1);
+    } else {
+      if (ARROW_PREDICT_FALSE(unsigned_value > max_positive)) {
+        return false;
+      }
+      *out = static_cast<value_type>(unsigned_value);
+    }
+    return true;
+  }
+};
+
+template <>
+struct StringConverter<Int8Type> : public StringToSignedIntConverterMixin<Int8Type> {
+  using StringToSignedIntConverterMixin<Int8Type>::StringToSignedIntConverterMixin;
+};
+
+template <>
+struct StringConverter<Int16Type> : public StringToSignedIntConverterMixin<Int16Type> {
+  using StringToSignedIntConverterMixin<Int16Type>::StringToSignedIntConverterMixin;
+};
+
+template <>
+struct StringConverter<Int32Type> : public StringToSignedIntConverterMixin<Int32Type> {
+  using StringToSignedIntConverterMixin<Int32Type>::StringToSignedIntConverterMixin;
+};
+
+template <>
+struct StringConverter<Int64Type> : public StringToSignedIntConverterMixin<Int64Type> {
+  using StringToSignedIntConverterMixin<Int64Type>::StringToSignedIntConverterMixin;
+};
+
+namespace detail {
+
+// Inline-able ISO-8601 parser
+
+using ts_type = TimestampType::c_type;
+
+template <typename Duration>
+static inline bool ParseYYYY_MM_DD(const char* s, Duration* since_epoch) {
+  uint16_t year = 0;
+  uint8_t month = 0;
+  uint8_t day = 0;
+  if (ARROW_PREDICT_FALSE(s[4] != '-') || ARROW_PREDICT_FALSE(s[7] != '-')) {
+    return false;
+  }
+  if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 0, 4, &year))) {
+    return false;
+  }
+  if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 5, 2, &month))) {
+    return false;
+  }
+  if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 8, 2, &day))) {
+    return false;
+  }
+  arrow_vendored::date::year_month_day ymd{arrow_vendored::date::year{year},
+                                           arrow_vendored::date::month{month},
+                                           arrow_vendored::date::day{day}};
+  if (ARROW_PREDICT_FALSE(!ymd.ok())) return false;
+
+  *since_epoch = std::chrono::duration_cast<Duration>(
+      arrow_vendored::date::sys_days{ymd}.time_since_epoch());
+  return true;
+}
+
+template <typename Duration>
+static inline bool ParseHH(const char* s, Duration* out) {
+  uint8_t hours = 0;
+  if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 0, 2, &hours))) {
+    return false;
+  }
+  if (ARROW_PREDICT_FALSE(hours >= 24)) {
+    return false;
+  }
+  *out = std::chrono::duration_cast<Duration>(std::chrono::hours(hours));
+  return true;
+}
+
+template <typename Duration>
+static inline bool ParseHH_MM(const char* s, Duration* out) {
+  uint8_t hours = 0;
+  uint8_t minutes = 0;
+  if (ARROW_PREDICT_FALSE(s[2] != ':')) {
+    return false;
+  }
+  if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 0, 2, &hours))) {
+    return false;
+  }
+  if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 3, 2, &minutes))) {
+    return false;
+  }
+  if (ARROW_PREDICT_FALSE(hours >= 24)) {
+    return false;
+  }
+  if (ARROW_PREDICT_FALSE(minutes >= 60)) {
+    return false;
+  }
+  *out = std::chrono::duration_cast<Duration>(std::chrono::hours(hours) +
+                                              std::chrono::minutes(minutes));
+  return true;
+}
+
+template <typename Duration>
+static inline bool ParseHHMM(const char* s, Duration* out) {
+  uint8_t hours = 0;
+  uint8_t minutes = 0;
+  if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 0, 2, &hours))) {
+    return false;
+  }
+  if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 2, 2, &minutes))) {
+    return false;
+  }
+  if (ARROW_PREDICT_FALSE(hours >= 24)) {
+    return false;
+  }
+  if (ARROW_PREDICT_FALSE(minutes >= 60)) {
+    return false;
+  }
+  *out = std::chrono::duration_cast<Duration>(std::chrono::hours(hours) +
+                                              std::chrono::minutes(minutes));
+  return true;
+}
+
+template <typename Duration>
+static inline bool ParseHH_MM_SS(const char* s, Duration* out) {
+  uint8_t hours = 0;
+  uint8_t minutes = 0;
+  uint8_t seconds = 0;
+  if (ARROW_PREDICT_FALSE(s[2] != ':') || ARROW_PREDICT_FALSE(s[5] != ':')) {
+    return false;
+  }
+  if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 0, 2, &hours))) {
+    return false;
+  }
+  if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 3, 2, &minutes))) {
+    return false;
+  }
+  if (ARROW_PREDICT_FALSE(!ParseUnsigned(s + 6, 2, &seconds))) {
+    return false;
+  }
+  if (ARROW_PREDICT_FALSE(hours >= 24)) {
+    return false;
+  }
+  if (ARROW_PREDICT_FALSE(minutes >= 60)) {
+    return false;
+  }
+  if (ARROW_PREDICT_FALSE(seconds >= 60)) {
+    return false;
+  }
+  *out = std::chrono::duration_cast<Duration>(std::chrono::hours(hours) +
+                                              std::chrono::minutes(minutes) +
+                                              std::chrono::seconds(seconds));
+  return true;
+}
+
+static inline bool ParseSubSeconds(const char* s, size_t length, TimeUnit::type unit,
+                                   uint32_t* out) {
+  // The decimal point has been peeled off at this point
+
+  // Fail if number of decimal places provided exceeds what the unit can hold.
+  // Calculate how many trailing decimal places are omitted for the unit
+  // e.g. if 4 decimal places are provided and unit is MICRO, 2 are missing
+  size_t omitted = 0;
+  switch (unit) {
+    case TimeUnit::MILLI:
+      if (ARROW_PREDICT_FALSE(length > 3)) {
+        return false;
+      }
+      if (length < 3) {
+        omitted = 3 - length;
+      }
+      break;
+    case TimeUnit::MICRO:
+      if (ARROW_PREDICT_FALSE(length > 6)) {
+        return false;
+      }
+      if (length < 6) {
+        omitted = 6 - length;
+      }
+      break;
+    case TimeUnit::NANO:
+      if (ARROW_PREDICT_FALSE(length > 9)) {
+        return false;
+      }
+      if (length < 9) {
+        omitted = 9 - length;
+      }
+      break;
+    default:
+      return false;
+  }
+
+  if (ARROW_PREDICT_TRUE(omitted == 0)) {
+    return ParseUnsigned(s, length, out);
+  } else {
+    uint32_t subseconds = 0;
+    bool success = ParseUnsigned(s, length, &subseconds);
+    if (ARROW_PREDICT_TRUE(success)) {
+      switch (omitted) {
+        case 1:
+          *out = subseconds * 10;
+          break;
+        case 2:
+          *out = subseconds * 100;
+          break;
+        case 3:
+          *out = subseconds * 1000;
+          break;
+        case 4:
+          *out = subseconds * 10000;
+          break;
+        case 5:
+          *out = subseconds * 100000;
+          break;
+        case 6:
+          *out = subseconds * 1000000;
+          break;
+        case 7:
+          *out = subseconds * 10000000;
+          break;
+        case 8:
+          *out = subseconds * 100000000;
+          break;
+        default:
+          // Impossible case
+          break;
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+}
+
+}  // namespace detail
+
+static inline bool ParseTimestampISO8601(const char* s, size_t length,
+                                         TimeUnit::type unit, TimestampType::c_type* out,
+                                         bool* out_zone_offset_present = NULLPTR) {
+  using seconds_type = std::chrono::duration<TimestampType::c_type>;
+
+  // We allow the following zone offset formats:
+  // - (none)
+  // - Z
+  // - [+-]HH(:?MM)?
+  //
+  // We allow the following formats for all units:
+  // - "YYYY-MM-DD"
+  // - "YYYY-MM-DD[ T]hhZ?"
+  // - "YYYY-MM-DD[ T]hh:mmZ?"
+  // - "YYYY-MM-DD[ T]hh:mm:ssZ?"
+  //
+  // We allow the following formats for unit == MILLI, MICRO, or NANO:
+  // - "YYYY-MM-DD[ T]hh:mm:ss.s{1,3}Z?"
+  //
+  // We allow the following formats for unit == MICRO, or NANO:
+  // - "YYYY-MM-DD[ T]hh:mm:ss.s{4,6}Z?"
+  //
+  // We allow the following formats for unit == NANO:
+  // - "YYYY-MM-DD[ T]hh:mm:ss.s{7,9}Z?"
+  //
+  // UTC is always assumed, and the DataType's timezone is ignored.
+  //
+
+  if (ARROW_PREDICT_FALSE(length < 10)) return false;
+
+  seconds_type seconds_since_epoch;
+  if (ARROW_PREDICT_FALSE(!detail::ParseYYYY_MM_DD(s, &seconds_since_epoch))) {
+    return false;
+  }
+
+  if (length == 10) {
+    *out = util::CastSecondsToUnit(unit, seconds_since_epoch.count());
+    return true;
+  }
+
+  if (ARROW_PREDICT_FALSE(s[10] != ' ') && ARROW_PREDICT_FALSE(s[10] != 'T')) {
+    return false;
+  }
+
+  if (out_zone_offset_present) {
+    *out_zone_offset_present = false;
+  }
+
+  seconds_type zone_offset(0);
+  if (s[length - 1] == 'Z') {
+    --length;
+    if (out_zone_offset_present) *out_zone_offset_present = true;
+  } else if (s[length - 3] == '+' || s[length - 3] == '-') {
+    // [+-]HH
+    length -= 3;
+    if (ARROW_PREDICT_FALSE(!detail::ParseHH(s + length + 1, &zone_offset))) {
+      return false;
+    }
+    if (s[length] == '+') zone_offset *= -1;
+    if (out_zone_offset_present) *out_zone_offset_present = true;
+  } else if (s[length - 5] == '+' || s[length - 5] == '-') {
+    // [+-]HHMM
+    length -= 5;
+    if (ARROW_PREDICT_FALSE(!detail::ParseHHMM(s + length + 1, &zone_offset))) {
+      return false;
+    }
+    if (s[length] == '+') zone_offset *= -1;
+    if (out_zone_offset_present) *out_zone_offset_present = true;
+  } else if ((s[length - 6] == '+' || s[length - 6] == '-') && (s[length - 3] == ':')) {
+    // [+-]HH:MM
+    length -= 6;
+    if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM(s + length + 1, &zone_offset))) {
+      return false;
+    }
+    if (s[length] == '+') zone_offset *= -1;
+    if (out_zone_offset_present) *out_zone_offset_present = true;
+  }
+
+  seconds_type seconds_since_midnight;
+  switch (length) {
+    case 13:  // YYYY-MM-DD[ T]hh
+      if (ARROW_PREDICT_FALSE(!detail::ParseHH(s + 11, &seconds_since_midnight))) {
+        return false;
+      }
+      break;
+    case 16:  // YYYY-MM-DD[ T]hh:mm
+      if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM(s + 11, &seconds_since_midnight))) {
+        return false;
+      }
+      break;
+    case 19:  // YYYY-MM-DD[ T]hh:mm:ss
+    case 21:  // YYYY-MM-DD[ T]hh:mm:ss.s
+    case 22:  // YYYY-MM-DD[ T]hh:mm:ss.ss
+    case 23:  // YYYY-MM-DD[ T]hh:mm:ss.sss
+    case 24:  // YYYY-MM-DD[ T]hh:mm:ss.ssss
+    case 25:  // YYYY-MM-DD[ T]hh:mm:ss.sssss
+    case 26:  // YYYY-MM-DD[ T]hh:mm:ss.ssssss
+    case 27:  // YYYY-MM-DD[ T]hh:mm:ss.sssssss
+    case 28:  // YYYY-MM-DD[ T]hh:mm:ss.ssssssss
+    case 29:  // YYYY-MM-DD[ T]hh:mm:ss.sssssssss
+      if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM_SS(s + 11, &seconds_since_midnight))) {
+        return false;
+      }
+      break;
+    default:
+      return false;
+  }
+
+  seconds_since_epoch += seconds_since_midnight;
+  seconds_since_epoch += zone_offset;
+
+  if (length <= 19) {
+    *out = util::CastSecondsToUnit(unit, seconds_since_epoch.count());
+    return true;
+  }
+
+  if (ARROW_PREDICT_FALSE(s[19] != '.')) {
+    return false;
+  }
+
+  uint32_t subseconds = 0;
+  if (ARROW_PREDICT_FALSE(
+          !detail::ParseSubSeconds(s + 20, length - 20, unit, &subseconds))) {
+    return false;
+  }
+
+  *out = util::CastSecondsToUnit(unit, seconds_since_epoch.count()) + subseconds;
+  return true;
+}
+
+#if defined(_WIN32) || defined(ARROW_WITH_MUSL)
+static constexpr bool kStrptimeSupportsZone = false;
+#else
+static constexpr bool kStrptimeSupportsZone = true;
+#endif
+
+/// \brief Returns time since the UNIX epoch in the requested unit
+static inline bool ParseTimestampStrptime(const char* buf, size_t length,
+                                          const char* format, bool ignore_time_in_day,
+                                          bool allow_trailing_chars, TimeUnit::type unit,
+                                          int64_t* out) {
+  // NOTE: strptime() is more than 10x faster than arrow_vendored::date::parse().
+  // The buffer may not be nul-terminated
+  std::string clean_copy(buf, length);
+  struct tm result;
+  memset(&result, 0, sizeof(struct tm));
+#ifdef _WIN32
+  char* ret = arrow_strptime(clean_copy.c_str(), format, &result);
+#else
+  char* ret = strptime(clean_copy.c_str(), format, &result);
+#endif
+  if (ret == NULLPTR) {
+    return false;
+  }
+  if (!allow_trailing_chars && static_cast<size_t>(ret - clean_copy.c_str()) != length) {
+    return false;
+  }
+  // ignore the time part
+  arrow_vendored::date::sys_seconds secs =
+      arrow_vendored::date::sys_days(arrow_vendored::date::year(result.tm_year + 1900) /
+                                     (result.tm_mon + 1) / result.tm_mday);
+  if (!ignore_time_in_day) {
+    secs += (std::chrono::hours(result.tm_hour) + std::chrono::minutes(result.tm_min) +
+             std::chrono::seconds(result.tm_sec));
+#ifndef _WIN32
+    secs -= std::chrono::seconds(result.tm_gmtoff);
+#endif
+  }
+  *out = util::CastSecondsToUnit(unit, secs.time_since_epoch().count());
+  return true;
+}
+
+template <>
+struct StringConverter<TimestampType> {
+  using value_type = int64_t;
+
+  bool Convert(const TimestampType& type, const char* s, size_t length, value_type* out) {
+    return ParseTimestampISO8601(s, length, type.unit(), out);
+  }
+};
+
+template <>
+struct StringConverter<DurationType>
+    : public StringToSignedIntConverterMixin<DurationType> {
+  using StringToSignedIntConverterMixin<DurationType>::StringToSignedIntConverterMixin;
+};
+
+template <typename DATE_TYPE>
+struct StringConverter<DATE_TYPE, enable_if_date<DATE_TYPE>> {
+  using value_type = typename DATE_TYPE::c_type;
+
+  using duration_type =
+      typename std::conditional<std::is_same<DATE_TYPE, Date32Type>::value,
+                                arrow_vendored::date::days,
+                                std::chrono::milliseconds>::type;
+
+  bool Convert(const DATE_TYPE& type, const char* s, size_t length, value_type* out) {
+    if (ARROW_PREDICT_FALSE(length != 10)) {
+      return false;
+    }
+
+    duration_type since_epoch;
+    if (ARROW_PREDICT_FALSE(!detail::ParseYYYY_MM_DD(s, &since_epoch))) {
+      return false;
+    }
+
+    *out = static_cast<value_type>(since_epoch.count());
+    return true;
+  }
+};
+
+template <typename TIME_TYPE>
+struct StringConverter<TIME_TYPE, enable_if_time<TIME_TYPE>> {
+  using value_type = typename TIME_TYPE::c_type;
+
+  // We allow the following formats for all units:
+  // - "hh:mm"
+  // - "hh:mm:ss"
+  //
+  // We allow the following formats for unit == MILLI, MICRO, or NANO:
+  // - "hh:mm:ss.s{1,3}"
+  //
+  // We allow the following formats for unit == MICRO, or NANO:
+  // - "hh:mm:ss.s{4,6}"
+  //
+  // We allow the following formats for unit == NANO:
+  // - "hh:mm:ss.s{7,9}"
+
+  bool Convert(const TIME_TYPE& type, const char* s, size_t length, value_type* out) {
+    const auto unit = type.unit();
+    std::chrono::seconds since_midnight;
+
+    if (length == 5) {
+      if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM(s, &since_midnight))) {
+        return false;
+      }
+      *out =
+          static_cast<value_type>(util::CastSecondsToUnit(unit, since_midnight.count()));
+      return true;
+    }
+
+    if (ARROW_PREDICT_FALSE(length < 8)) {
+      return false;
+    }
+    if (ARROW_PREDICT_FALSE(!detail::ParseHH_MM_SS(s, &since_midnight))) {
+      return false;
+    }
+
+    *out = static_cast<value_type>(util::CastSecondsToUnit(unit, since_midnight.count()));
+
+    if (length == 8) {
+      return true;
+    }
+
+    if (ARROW_PREDICT_FALSE(s[8] != '.')) {
+      return false;
+    }
+
+    uint32_t subseconds_count = 0;
+    if (ARROW_PREDICT_FALSE(
+            !detail::ParseSubSeconds(s + 9, length - 9, unit, &subseconds_count))) {
+      return false;
+    }
+
+    *out += subseconds_count;
+    return true;
+  }
+};
+
+/// \brief Convenience wrappers around internal::StringConverter.
+template <typename T>
+bool ParseValue(const T& type, const char* s, size_t length,
+                typename StringConverter<T>::value_type* out) {
+  return StringConverter<T>{}.Convert(type, s, length, out);
+}
+
+template <typename T>
+enable_if_parameter_free<T, bool> ParseValue(
+    const char* s, size_t length, typename StringConverter<T>::value_type* out) {
+  static T type;
+  return StringConverter<T>{}.Convert(type, s, length, out);
+}
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/vector.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/vector.h
@@ -0,0 +1,172 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "arrow/result.h"
+#include "arrow/util/algorithm.h"
+#include "arrow/util/functional.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+namespace internal {
+
+template <typename T>
+std::vector<T> DeleteVectorElement(const std::vector<T>& values, size_t index) {
+  DCHECK(!values.empty());
+  DCHECK_LT(index, values.size());
+  std::vector<T> out;
+  out.reserve(values.size() - 1);
+  for (size_t i = 0; i < index; ++i) {
+    out.push_back(values[i]);
+  }
+  for (size_t i = index + 1; i < values.size(); ++i) {
+    out.push_back(values[i]);
+  }
+  return out;
+}
+
+template <typename T>
+std::vector<T> AddVectorElement(const std::vector<T>& values, size_t index,
+                                T new_element) {
+  DCHECK_LE(index, values.size());
+  std::vector<T> out;
+  out.reserve(values.size() + 1);
+  for (size_t i = 0; i < index; ++i) {
+    out.push_back(values[i]);
+  }
+  out.emplace_back(std::move(new_element));
+  for (size_t i = index; i < values.size(); ++i) {
+    out.push_back(values[i]);
+  }
+  return out;
+}
+
+template <typename T>
+std::vector<T> ReplaceVectorElement(const std::vector<T>& values, size_t index,
+                                    T new_element) {
+  DCHECK_LE(index, values.size());
+  std::vector<T> out;
+  out.reserve(values.size());
+  for (size_t i = 0; i < index; ++i) {
+    out.push_back(values[i]);
+  }
+  out.emplace_back(std::move(new_element));
+  for (size_t i = index + 1; i < values.size(); ++i) {
+    out.push_back(values[i]);
+  }
+  return out;
+}
+
+template <typename T, typename Predicate>
+std::vector<T> FilterVector(std::vector<T> values, Predicate&& predicate) {
+  auto new_end = std::remove_if(values.begin(), values.end(),
+                                [&](const T& value) { return !predicate(value); });
+  values.erase(new_end, values.end());
+  return values;
+}
+
+template <typename Fn, typename From,
+          typename To = decltype(std::declval<Fn>()(std::declval<From>()))>
+std::vector<To> MapVector(Fn&& map, const std::vector<From>& source) {
+  std::vector<To> out;
+  out.reserve(source.size());
+  std::transform(source.begin(), source.end(), std::back_inserter(out),
+                 std::forward<Fn>(map));
+  return out;
+}
+
+template <typename Fn, typename From,
+          typename To = decltype(std::declval<Fn>()(std::declval<From>()))>
+std::vector<To> MapVector(Fn&& map, std::vector<From>&& source) {
+  std::vector<To> out;
+  out.reserve(source.size());
+  std::transform(std::make_move_iterator(source.begin()),
+                 std::make_move_iterator(source.end()), std::back_inserter(out),
+                 std::forward<Fn>(map));
+  return out;
+}
+
+/// \brief Like MapVector, but where the function can fail.
+template <typename Fn, typename From = internal::call_traits::argument_type<0, Fn>,
+          typename To = typename internal::call_traits::return_type<Fn>::ValueType>
+Result<std::vector<To>> MaybeMapVector(Fn&& map, const std::vector<From>& source) {
+  std::vector<To> out;
+  out.reserve(source.size());
+  ARROW_RETURN_NOT_OK(MaybeTransform(source.begin(), source.end(),
+                                     std::back_inserter(out), std::forward<Fn>(map)));
+  return std::move(out);
+}
+
+template <typename Fn, typename From = internal::call_traits::argument_type<0, Fn>,
+          typename To = typename internal::call_traits::return_type<Fn>::ValueType>
+Result<std::vector<To>> MaybeMapVector(Fn&& map, std::vector<From>&& source) {
+  std::vector<To> out;
+  out.reserve(source.size());
+  ARROW_RETURN_NOT_OK(MaybeTransform(std::make_move_iterator(source.begin()),
+                                     std::make_move_iterator(source.end()),
+                                     std::back_inserter(out), std::forward<Fn>(map)));
+  return std::move(out);
+}
+
+template <typename T>
+std::vector<T> FlattenVectors(const std::vector<std::vector<T>>& vecs) {
+  std::size_t sum = 0;
+  for (const auto& vec : vecs) {
+    sum += vec.size();
+  }
+  std::vector<T> out;
+  out.reserve(sum);
+  for (const auto& vec : vecs) {
+    out.insert(out.end(), vec.begin(), vec.end());
+  }
+  return out;
+}
+
+template <typename T>
+Result<std::vector<T>> UnwrapOrRaise(std::vector<Result<T>>&& results) {
+  std::vector<T> out;
+  out.reserve(results.size());
+  auto end = std::make_move_iterator(results.end());
+  for (auto it = std::make_move_iterator(results.begin()); it != end; it++) {
+    if (!it->ok()) {
+      return it->status();
+    }
+    out.push_back(it->MoveValueUnsafe());
+  }
+  return std::move(out);
+}
+
+template <typename T>
+Result<std::vector<T>> UnwrapOrRaise(const std::vector<Result<T>>& results) {
+  std::vector<T> out;
+  out.reserve(results.size());
+  for (const auto& result : results) {
+    if (!result.ok()) {
+      return result.status();
+    }
+    out.push_back(result.ValueUnsafe());
+  }
+  return std::move(out);
+}
+
+}  // namespace internal
+}  // namespace arrow
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/visibility.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/visibility.h
@@ -0,0 +1,83 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+// Windows
+
+#if defined(_MSC_VER)
+#pragma warning(disable : 4251)
+#else
+#pragma GCC diagnostic ignored "-Wattributes"
+#endif
+
+#if defined(__cplusplus) && defined(__GNUC__) && !defined(__clang__)
+// Use C++ attribute syntax where possible to avoid GCC parser bug
+// (https://stackoverflow.com/questions/57993818/gcc-how-to-combine-attribute-dllexport-and-nodiscard-in-a-struct-de)
+#define ARROW_DLLEXPORT [[gnu::dllexport]]
+#define ARROW_DLLIMPORT [[gnu::dllimport]]
+#else
+#define ARROW_DLLEXPORT __declspec(dllexport)
+#define ARROW_DLLIMPORT __declspec(dllimport)
+#endif
+
+#ifdef ARROW_STATIC
+#define ARROW_EXPORT
+#define ARROW_FRIEND_EXPORT
+#define ARROW_TEMPLATE_EXPORT
+#elif defined(ARROW_EXPORTING)
+#define ARROW_EXPORT ARROW_DLLEXPORT
+// For some reason [[gnu::dllexport]] doesn't work well with friend declarations
+#define ARROW_FRIEND_EXPORT __declspec(dllexport)
+#define ARROW_TEMPLATE_EXPORT ARROW_DLLEXPORT
+#else
+#define ARROW_EXPORT ARROW_DLLIMPORT
+#define ARROW_FRIEND_EXPORT __declspec(dllimport)
+#define ARROW_TEMPLATE_EXPORT ARROW_DLLIMPORT
+#endif
+
+#define ARROW_NO_EXPORT
+#define ARROW_FORCE_INLINE __forceinline
+
+#else
+
+// Non-Windows
+
+#define ARROW_FORCE_INLINE
+
+#if defined(__cplusplus) && (defined(__GNUC__) || defined(__clang__))
+#ifndef ARROW_EXPORT
+#define ARROW_EXPORT [[gnu::visibility("default")]]
+#endif
+#ifndef ARROW_NO_EXPORT
+#define ARROW_NO_EXPORT [[gnu::visibility("hidden")]]
+#endif
+#else
+// Not C++, or not gcc/clang
+#ifndef ARROW_EXPORT
+#define ARROW_EXPORT
+#endif
+#ifndef ARROW_NO_EXPORT
+#define ARROW_NO_EXPORT
+#endif
+#endif
+
+#define ARROW_FRIEND_EXPORT
+#define ARROW_TEMPLATE_EXPORT
+
+#endif  // Non-Windows
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/windows_compatibility.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/windows_compatibility.h
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifdef _WIN32
+
+// Windows defines min and max macros that mess up std::min/max
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
+
+#define WIN32_LEAN_AND_MEAN
+
+// Set Windows 7 as a conservative minimum for Apache Arrow
+#if defined(_WIN32_WINNT) && _WIN32_WINNT < 0x601
+#undef _WIN32_WINNT
+#endif
+#ifndef _WIN32_WINNT
+#define _WIN32_WINNT 0x601
+#endif
+
+#include <winsock2.h>
+#include <windows.h>
+
+#include "arrow/util/windows_fixup.h"
+
+#endif  // _WIN32
--- a/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/windows_fixup.h
+++ b/venv/lib/python3.9/site-packages/pyarrow/include/arrow/util/windows_fixup.h
@@ -0,0 +1,52 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This header needs to be included multiple times.
+
+#ifdef _WIN32
+
+#ifdef max
+#undef max
+#endif
+#ifdef min
+#undef min
+#endif
+
+// The Windows API defines macros from *File resolving to either
+// *FileA or *FileW.  Need to undo them.
+#ifdef CopyFile
+#undef CopyFile
+#endif
+#ifdef CreateFile
+#undef CreateFile
+#endif
+#ifdef DeleteFile
+#undef DeleteFile
+#endif
+
+// Other annoying Windows macro definitions...
+#ifdef IN
+#undef IN
+#endif
+#ifdef OUT
+#undef OUT
+#endif
+
+// Note that we can't undefine OPTIONAL, because it can be used in other
+// Windows headers...
+
+#endif  // _WIN32