diff --git a/README.md b/README.md
index d4fde35..c00339e 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
 
 Learning to Optimize (LearningToOptimize) package that provides basic functionalities to help fit proxy models for parametric optimization problems.
 
-Have a look at our sister [HugginFace Organization](https://huggingface.co/LearningToOptimize), for datasets, pre-trained models and benchmarks.
+Have a look at our sister [HuggingFace Organization](https://huggingface.co/LearningToOptimize), for datasets, pre-trained models and benchmarks.
 
 [![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://andrewrosemberg.github.io/LearningToOptimize.jl/stable/)
 [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://andrewrosemberg.github.io/LearningToOptimize.jl/dev/)
diff --git a/src/FullyConnected.jl b/src/FullyConnected.jl
index 4478b6b..95e6a90 100644
--- a/src/FullyConnected.jl
+++ b/src/FullyConnected.jl
@@ -157,7 +157,7 @@ function train!(model, loss, opt_state, X, Y; _batchsize = 32, shuffle = true)
     Y = Y |> gpu
     data = Flux.DataLoader((X, Y), batchsize = batchsize, shuffle = shuffle)
     for d in data
-        ∇model, _ = gradient(model, d...) do m, x, y  # calculate the gradients
+        ∇model, _ = Flux.gradient(model, d...) do m, x, y  # calculate the gradients
             loss(m(x), y)
         end
         # insert what ever code you want here that needs gradient