diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
index 79cd4c6..2952274 100644
--- a/.github/workflows/pythonpackage.yml
+++ b/.github/workflows/pythonpackage.yml
@@ -11,7 +11,7 @@ jobs:
       max-parallel: 12
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]
-        python-version: [3.8, 3.9, "3.10", "3.11", "3.12"]
+        python-version: [3.9, "3.10", "3.11", "3.12", "3.13", "3.14"]
     steps:
     - uses: actions/checkout@v3
     - name: Checkout submodules
diff --git a/pyproject.toml b/pyproject.toml
index 9977bb5..481bc76 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,14 +16,13 @@ classifiers=[
   "Environment :: Console",
   "Intended Audience :: Developers",
   "Intended Audience :: Science/Research",
-  "License :: OSI Approved :: MIT License",
   "Operating System :: OS Independent",
   "Programming Language :: Python :: 3",
   "Topic :: Scientific/Engineering",
   "Topic :: Multimedia :: Sound/Audio",
 ]
 keywords=["samplerate", "converter", "signal processing", "audio"]
-license = {text = "MIT"}
+license = "MIT"
 dependencies = ["numpy"]
 
 [tool.setuptools.dynamic]
diff --git a/src/samplerate.cpp b/src/samplerate.cpp
index 42bb4d3..485cd66 100644
--- a/src/samplerate.cpp
+++ b/src/samplerate.cpp
@@ -40,6 +40,9 @@
 #define VERSION_INFO "nightly"
 #endif
 
+// This value was empirically and somewhat arbitrarily chosen; increase it for further safety.
+#define END_OF_INPUT_EXTRA_OUTPUT_FRAMES 10000
+
 namespace py = pybind11;
 using namespace pybind11::literals;
 
@@ -158,8 +161,15 @@ class Resampler {
     if (channels != _channels || channels == 0)
       throw std::domain_error("Invalid number of channels in input data.");
 
+    // Add a "fudge factor" to the size. This is because the actual number of
+    // output samples generated on the last call when input is terminated can
+    // be more than the expected number of output samples during mid-stream
+    // steady-state processing. (Also, when the stream is started, the number
+    // of output samples generated will generally be zero or otherwise less
+    // than the number of samples in mid-stream processing.)
     const auto new_size =
-        static_cast<size_t>(std::ceil(inbuf.shape[0] * sr_ratio));
+        static_cast<size_t>(std::ceil(inbuf.shape[0] * sr_ratio))
+        + END_OF_INPUT_EXTRA_OUTPUT_FRAMES;
 
     // allocate output array
     std::vector<size_t> out_shape{new_size};
@@ -179,12 +189,23 @@ class Resampler {
         sr_ratio       // src_ratio, sampling rate conversion ratio
     };
 
-    error_handler(src_process(_state, &src_data));
+    // Release GIL for the entire resampling operation
+    int err_code;
+    long output_frames_gen;
+    {
+      py::gil_scoped_release release;
+      err_code = src_process(_state, &src_data);
+      output_frames_gen = src_data.output_frames_gen;
+    }
+    error_handler(err_code);
 
     // create a shorter view of the array
-    if ((size_t)src_data.output_frames_gen < new_size) {
-      out_shape[0] = src_data.output_frames_gen;
+    if ((size_t)output_frames_gen < new_size) {
+      out_shape[0] = output_frames_gen;
       output.resize(out_shape);
+    } else if ((size_t)output_frames_gen >= new_size) {
+      // This means our fudge factor is too small.
+      throw std::runtime_error("Generated more output samples than expected!");
     }
 
     return output;
@@ -211,6 +232,7 @@ class CallbackResampler {
   callback_t _callback = nullptr;
   np_array_f32 _current_buffer;
   size_t _buffer_ndim = 0;
+  std::string _callback_error_msg = "";
 
  public:
   double _ratio = 0.0;
@@ -259,6 +281,7 @@ class CallbackResampler {
         _callback(r._callback),
         _current_buffer(std::move(r._current_buffer)),
         _buffer_ndim(r._buffer_ndim),
+        _callback_error_msg(std::move(r._callback_error_msg)),
         _ratio(r._ratio),
         _converter_type(r._converter_type),
         _channels(r._channels) {
@@ -274,6 +297,11 @@ class CallbackResampler {
 
   void set_buffer(const np_array_f32 &new_buf) { _current_buffer = new_buf; }
   size_t get_channels() { return _channels; }
+  void set_callback_error(const std::string &error_msg) {
+    _callback_error_msg = error_msg;
+  }
+  std::string get_callback_error() const { return _callback_error_msg; }
+  void clear_callback_error() { _callback_error_msg = ""; }
 
   np_array_f32 callback(void) {
     auto input = _callback();
@@ -293,13 +321,32 @@ class CallbackResampler {
 
     if (_state == nullptr) _create();
 
-    // read from the callback
-    size_t output_frames_gen = src_callback_read(
-        _state, _ratio, (long)frames, static_cast<float *>(outbuf.ptr));
+    // clear any previous callback error
+    clear_callback_error();
+
+    // read from the callback - note: GIL is managed by the_callback_func
+    // which acquires it only when calling the Python callback
+    size_t output_frames_gen = 0;
+    int err_code = 0;
+    {
+      py::gil_scoped_release release;
+      output_frames_gen = src_callback_read(_state, _ratio, (long)frames,
+                                            static_cast<float *>(outbuf.ptr));
+      // Get error code while GIL is released
+      if (output_frames_gen == 0) {
+        err_code = src_error(_state);
+      }
+    }
+
+    // check if callback had an error
+    std::string callback_error = get_callback_error();
+    if (!callback_error.empty()) {
+      throw std::domain_error(callback_error);
+    }
 
     // check error status
     if (output_frames_gen == 0) {
-      error_handler(src_error(_state));
+      error_handler(err_code);
     }
 
     // if there is only one channel and the input array had only on dimension
@@ -340,11 +387,14 @@ long the_callback_func(void *cb_data, float **data) {
   CallbackResampler *cb = static_cast<CallbackResampler *>(cb_data);
   int cb_channels = cb->get_channels();
 
-  // get the data as a numpy array
-  auto input = cb->callback();
+  py::buffer_info inbuf;
+  {
+    py::gil_scoped_acquire acquire;
 
-  // accessors for the arrays
-  py::buffer_info inbuf = input.request();
+    // get the data as a numpy array
+    auto input = cb->callback();
+    inbuf = input.request();
+  }
 
   // end of stream is signaled by a None, which is cast to a ndarray with ndim
   // == 0
@@ -354,11 +404,17 @@ long the_callback_func(void *cb_data, float **data) {
   int channels = 1;
   if (inbuf.ndim == 2)
     channels = inbuf.shape[1];
-  else if (inbuf.ndim > 2)
-    throw std::domain_error("Input array should have at most 2 dimensions");
+  else if (inbuf.ndim > 2) {
+    // Cannot throw exception in C callback - store error and return 0
+    cb->set_callback_error("Input array should have at most 2 dimensions");
+    return 0;
+  }
 
-  if (channels != cb_channels || channels == 0)
-    throw std::domain_error("Invalid number of channels in input data.");
+  if (channels != cb_channels || channels == 0) {
+    // Cannot throw exception in C callback - store error and return 0
+    cb->set_callback_error("Invalid number of channels in input data.");
+    return 0;
+  }
 
   *data = static_cast<float *>(inbuf.ptr);
 
@@ -386,8 +442,12 @@ py::array_t<float, py::array::c_style> resample(
   if (channels == 0)
     throw std::domain_error("Invalid number of channels (0) in input data.");
 
+  // Add buffer space to match Resampler.process() behavior with end_of_input=True
+  // src_simple internally behaves like end_of_input=True, so it may generate
+  // extra samples from buffer flushing, especially for certain converters
   const auto new_size =
-      static_cast<size_t>(std::ceil(inbuf.shape[0] * sr_ratio));
+      static_cast<size_t>(std::ceil(inbuf.shape[0] * sr_ratio))
+      + END_OF_INPUT_EXTRA_OUTPUT_FRAMES;
 
   // allocate output array
   std::vector<size_t> out_shape{new_size};
@@ -407,20 +467,31 @@ py::array_t<float, py::array::c_style> resample(
       sr_ratio  // src_ratio, sampling rate conversion ratio
   };
 
-  int ret_code = src_simple(&src_data, converter_type_int, channels);
-
-  error_handler(ret_code);
+  // Release GIL for the entire resampling operation
+  int err_code;
+  long output_frames_gen;
+  long input_frames_used;
+  {
+    py::gil_scoped_release release;
+    err_code = src_simple(&src_data, converter_type_int, channels);
+    output_frames_gen = src_data.output_frames_gen;
+    input_frames_used = src_data.input_frames_used;
+  }
+  error_handler(err_code);
 
   // create a shorter view of the array
-  if ((size_t)src_data.output_frames_gen < new_size) {
-    out_shape[0] = src_data.output_frames_gen;
+  if ((size_t)output_frames_gen < new_size) {
+    out_shape[0] = output_frames_gen;
     output.resize(out_shape);
+  } else if ((size_t)output_frames_gen >= new_size) {
+    // This means our fudge factor is too small.
+    throw std::runtime_error("Generated more output samples than expected!");
   }
 
   if (verbose) {
     py::print("samplerate info:");
-    py::print(src_data.input_frames_used, " input frames used");
-    py::print(src_data.output_frames_gen, " output frames generated");
+    py::print(input_frames_used, " input frames used");
+    py::print(output_frames_gen, " output frames generated");
   }
 
   return output;