JuliaPy · mkitti · Jul 7, 2025 · Jul 7, 2025 · Jul 7, 2025 · Jul 7, 2025
diff --git a/.gitignore b/.gitignore
@@ -7,3 +7,7 @@ dist/
 .CondaPkg/
 /jltest.*
 uv.lock
+
+# pixi environments
+.pixi
+*.egg-info
diff --git a/src/GIL/GIL.jl b/src/GIL/GIL.jl
@@ -9,44 +9,99 @@ module GIL
 
 using ..C: C
 
+# Ensure that only one Julia task tries to acquire the Python GIL.
+# Avoid the potential issue that a task could miscompute whether
+# it actually has the GIL simply because a different task that ran
+# on the same thread that once had the GIL.
+# https://github.com/JuliaPy/PythonCall.jl/issues/627
+const _jl_gil_lock = ReentrantLock()
+
+"""
+    hasgil()
+
+Returns `true` if the current thread has the GIL or `false` otherwise.
+"""
+hasgil() = C.PyGILState_Check() == Cint(1)
+
 """
-    lock(f)
+    lock(f; exclusive=true)
 
 Lock the GIL, compute `f()`, unlock the GIL, then return the result of `f()`.
 
 Use this to run Python code from threads that do not currently hold the GIL, such as new
 threads. Since the main Julia thread holds the GIL by default, you will need to
 [`unlock`](@ref) the GIL before using this function.
 
+Setting the `exclusive` keyword to `false` allows more than one Julia `Task`
+to attempt to acquire the GIL. This may be useful if one Julia `Task` calls
+code which releases the GIL, in which case another Julia task could acquire
+it. However, this is probably not a sound approach.
+
 See [`@lock`](@ref) for the macro form.
 """
-function lock(f)
-    state = C.PyGILState_Ensure()
+function lock(f; exclusive=true)
+    exclusive && Base.lock(_jl_gil_lock)
     try
-        f()
+        state = C.PyGILState_Ensure()
+        try
+            f()
+        finally
+            C.PyGILState_Release(state)
+        end
     finally
-        C.PyGILState_Release(state)
+        exclusive && Base.unlock(_jl_gil_lock)
     end
 end
 
 """
-    @lock expr
+    @lock [exclusive=true] expr
 
 Lock the GIL, compute `expr`, unlock the GIL, then return the result of `expr`.
 
 Use this to run Python code from threads that do not currently hold the GIL, such as new
 threads. Since the main Julia thread holds the GIL by default, you will need to
 [`@unlock`](@ref) the GIL before using this function.
 
+Setting the `exclusive` parameter to `false` allows more than one Julia `Task`
+to attempt to acquire the GIL. This may be useful if one Julia `Task` calls
+code which releases the GIL, in which case another Julia task could acquire
+it. However, this is probably not a sound approach.
+
 The macro equivalent of [`lock`](@ref).
 """
 macro lock(expr)
     quote
-        state = C.PyGILState_Ensure()
+        Base.lock(_jl_gil_lock)
         try
-            $(esc(expr))
+            state = C.PyGILState_Ensure()
+            try
+                $(esc(expr))
+            finally
+                C.PyGILState_Release(state)
+            end
         finally
-            C.PyGILState_Release(state)
+            Base.unlock(_jl_gil_lock)
+        end
+    end
+end
+
+macro lock(parameter, expr)
+    parameter.head == :(=) &&
+        parameter.args[1] == :exclusive ||
+        throw(ArgumentError("The only accepted parameter to @lock is `exclusive`."))
+
+    do_lock = esc(parameter.args[2])
+    quote
+        $do_lock && Base.lock(_jl_gil_lock)
+        try
+            state = C.PyGILState_Ensure()
+            try
+                $(esc(expr))
+            finally
+                C.PyGILState_Release(state)
+            end
+        finally
+            $do_lock && Base.unlock(_jl_gil_lock)
         end
     end
 end
@@ -63,11 +118,17 @@ Python code. That other thread can be a Julia thread, which must lock the GIL us
 See [`@unlock`](@ref) for the macro form.
 """
 function unlock(f)
-    state = C.PyEval_SaveThread()
+    _locked = Base.islocked(_jl_gil_lock)
+    _locked && Base.unlock(_jl_gil_lock)
     try
-        f()
+        state = C.PyEval_SaveThread()
+        try
+            f()
+        finally
+            C.PyEval_RestoreThread(state)
+        end
     finally
-        C.PyEval_RestoreThread(state)
+        _locked && Base.lock(_jl_gil_lock)
     end
 end
 
@@ -84,13 +145,31 @@ The macro equivalent of [`unlock`](@ref).
 """
 macro unlock(expr)
     quote
-        state = C.PyEval_SaveThread()
+        _locked = Base.islocked(_jl_gil_lock)
+        _locked && Base.unlock(_jl_gil_lock)
         try
-            $(esc(expr))
+            state = C.PyEval_SaveThread()
+            try
+                $(esc(expr))
+            finally
+                C.PyEval_RestoreThread(state)
+            end
         finally
-            C.PyEval_RestoreThread(state)
+            _locked && Base.lock(_jl_gil_lock)
         end
     end
 end
 
+#=
+# Disable this for now since holding this lock will disable finalizers
+# If the main thread already has the GIL, we should lock _jl_gil_lock.
+function __init__()
+    if hasgil()
+        Base.lock(_jl_gil_lock)
+    end
+end
+=#
+
+include("GlobalInterpreterLock.jl")
+
 end
diff --git a/src/GIL/GlobalInterpreterLock.jl b/src/GIL/GlobalInterpreterLock.jl
@@ -0,0 +1,120 @@
+struct TaskState
+    task::Task
+    sticky::Bool # original stickiness of the task
+    state::C.PyGILState_STATE
+end
+
+struct TaskStack
+    stack::Vector{TaskState}
+    count::IdDict{Task,Int}
+    condvar::Threads.Condition
+    function TaskStack()
+        return new(TaskState[], IdDict{Task,Int}(), Threads.Condition())
+    end
+end
+function Base.last(task_stack::TaskStack)::Task
+    return last(task_stack.stack).task
+end
+function Base.push!(task_stack::TaskStack, task::Task)
+    original_sticky = task.sticky
+    # The task should not migrate threads while acquiring or holding the GIL
+    task.sticky = true
+    gil_state = C.PyGILState_Ensure()
+
+    # Save the stickiness and state for when we release
+    state = TaskState(task, original_sticky, gil_state)
+    push!(task_stack.stack, state)
+
+    # Increment the count for this task
+    count = get(task_stack.count, task, 0)
+    task_stack.count[task] = count + 1
+
+    return task_stack
+end
+function Base.pop!(task_stack::TaskStack)::Task
+    state = pop!(task_stack.stack)
+    task = state.task
+    sticky = state.sticky
+    gil_state = state.state
+
+    # Decrement the count for this task
+    count = task_stack.count[task] - 1
+    if count == 0
+        # If 0, remove it from the key set
+        pop!(task_stack.count, task)
+    else
+        task_stack[task] = count
+    end
+
+    C.PyGILState_Release(gil_state)
+
+    # Restore sticky state after releasing the GIL
+    task.sticky = sticky
+
+    Base.lock(task_stack.condvar) do
+        notify(task_stack.condvar)
+    end
+
+    return task
+end
+Base.isempty(task_stack::TaskStack) = isempty(task_stack.stack)
+
+if !isdefined(Base, :OncePerThread)
+
+    # OncePerThread is implemented in full in Julia 1.12
+    # This implementation is meant for compatibility with Julia 1.10 and 1.11
+    # and only supports a static number of threads. Use Julia 1.12 for dynamic
+    # thread usage.
+    mutable struct OncePerThread{T,F} <: Function
+        @atomic xs::Vector{T} # values
+        @atomic ss::Vector{UInt8} # states: 0=initial, 1=hasrun, 2=error, 3==concurrent
+        const initializer::F
+        function OncePerThread{T,F}(initializer::F) where {T,F}
+            nt = Threads.maxthreadid()
+            return new{T,F}(Vector{T}(undef, nt), zeros(UInt8, nt), initializer)
+        end
+    end
+    OncePerThread{T}(initializer::Type{U}) where {T, U} = OncePerThread{T,Type{U}}(initializer)
+    (once::OncePerThread{T,F})() where {T,F} = once[Threads.threadid()]
+    function Base.getindex(once::OncePerThread, tid::Integer)
+        tid = Threads.threadid()
+        ss = @atomic :acquire once.ss
+        xs = @atomic :monotonic once.xs
+        if checkbounds(Bool, xs, tid)
+            if ss[tid] == 0
+                xs[tid] = once.initializer()
+                ss[tid] = 1
+            end
+            return xs[tid]
+        else
+            throw(ErrorException("Thread id $tid is out of bounds as initially allocated. Use Julia 1.12 for dynamic thread usage."))
+        end
+    end
+
+end
+
+struct GlobalInterpreterLock <: Base.AbstractLock
+    lock_owners::OncePerThread{TaskStack}
+    function GlobalInterpreterLock()
+        return new(OncePerThread{TaskStack}(TaskStack))
+    end
+end
+function Base.lock(gil::GlobalInterpreterLock)
+    push!(gil.lock_owners(), current_task())
+    return nothing
+end
+function Base.unlock(gil::GlobalInterpreterLock)
+    lock_owner::TaskStack = gil.lock_owners()
+    while last(lock_owner) != current_task()
+        wait(lock_owner.condvar)
+    end
+    task = pop!(lock_owner)
+    @assert task == current_task()
+    return nothing
+end
+function Base.islocked(gil::GlobalInterpreterLock)
+    # TODO: handle Julia 1.10 and 1.11 case when have not allocated up to maxthreadid
+    return any(!isempty(gil.lock_owners[thread_index]) for thread_index in 1:Threads.maxthreadid())
+end
+
+const _GIL = GlobalInterpreterLock()
diff --git a/test/GC.jl b/test/GC.jl
@@ -1,4 +1,7 @@
+using TestItemRunner
+
 @testitem "GC.gc()" begin
+    using PythonCall
     let
         pyobjs = map(pylist, 1:100)
         PythonCall.GIL.@unlock Threads.@threads for obj in pyobjs
@@ -13,6 +16,7 @@
 end
 
 @testitem "GC.GCHook" begin
+    using PythonCall
     let
         pyobjs = map(pylist, 1:100)
         PythonCall.GIL.@unlock Threads.@threads for obj in pyobjs
@@ -23,5 +27,11 @@ end
         VERSION >= v"1.10.0-" &&
         @test !isempty(PythonCall.GC.QUEUE.items)
     GC.gc()
+
+    # Unlock and relocking the ReentrantLock allows this test to pass
+    # if _jl_gil_lock is locked on init
+    # Base.unlock(PythonCall.GIL._jl_gil_lock)
+    # Base.lock(PythonCall.GIL._jl_gil_lock)
+
     @test isempty(PythonCall.GC.QUEUE.items)
 end
diff --git a/test/GIL.jl b/test/GIL.jl
@@ -4,7 +4,7 @@
     function threaded_sleep()
         PythonCall.GIL.unlock() do
             Threads.@threads for i = 1:2
-                PythonCall.GIL.lock() do
+                PythonCall.GIL.lock(exclusive=false) do
                     pyimport("time").sleep(1)
                 end
             end
@@ -18,14 +18,22 @@
     if Threads.nthreads() ≥ 2
         @test 0.9 < t.time < 1.2
     end
+
+    @test PythonCall.GIL.hasgil()
+    PythonCall.GIL.unlock() do
+        @test !Base.islocked(PythonCall.GIL._jl_gil_lock)
+        PythonCall.GIL.lock() do
+            @test Base.islocked(PythonCall.GIL._jl_gil_lock)
+        end
+    end
 end
 
 @testitem "@unlock and @lock" begin
     # This calls Python's time.sleep(1) twice concurrently. Since sleep() unlocks the
     # GIL, these can happen in parallel if Julia has at least 2 threads.
     function threaded_sleep()
         PythonCall.GIL.@unlock Threads.@threads for i = 1:2
-            PythonCall.GIL.@lock pyimport("time").sleep(1)
+            PythonCall.GIL.@lock exclusive=false pyimport("time").sleep(1)
         end
     end
     # one run to ensure it's compiled
@@ -36,4 +44,13 @@ end
     if Threads.nthreads() ≥ 2
         @test 0.9 < t.time < 1.2
     end
+
+    @test PythonCall.GIL.hasgil()
+    PythonCall.GIL.@unlock begin
+        @test !Base.islocked(PythonCall.GIL._jl_gil_lock)
+        PythonCall.GIL.@lock begin
+            @test Base.islocked(PythonCall.GIL._jl_gil_lock)
+        end
+    end
+
 end