diff --git a/CHANGELOG.md b/CHANGELOG.md
index 808d072936..d306e1a7d6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,12 +7,13 @@ Please mark all change in change log and use the issue from GitHub
 ## Bug
 -   \#2890 Fix the index size caculation in cache
 -   \#2952 Fix the result merging of IVF_PQ IP
--   \#2975 Fix config ut failed
+-   \#2975 Fix config UT failed
 
 ## Feature
 
 ## Improvement
 -   \#2653 Improve IVF search performance when NQ and nProbe are both large
+-   \#2828 Let Faiss not to compile half float by default
 
 ## Task
 
diff --git a/core/src/index/thirdparty/faiss/gpu/GpuIndexIVF.cu b/core/src/index/thirdparty/faiss/gpu/GpuIndexIVF.cu
index 8e873c1914..130e95f866 100644
--- a/core/src/index/thirdparty/faiss/gpu/GpuIndexIVF.cu
+++ b/core/src/index/thirdparty/faiss/gpu/GpuIndexIVF.cu
@@ -27,6 +27,7 @@ GpuIndexIVF::GpuIndexIVF(GpuResources* resources,
     nlist(nlistIn),
     nprobe(1),
     quantizer(nullptr) {
+
   init_();
 
   // Only IP and L2 are supported for now
diff --git a/core/src/index/thirdparty/faiss/gpu/GpuIndexIVFFlat.cu b/core/src/index/thirdparty/faiss/gpu/GpuIndexIVFFlat.cu
index 6ca7c70ffb..e806ea49fa 100644
--- a/core/src/index/thirdparty/faiss/gpu/GpuIndexIVFFlat.cu
+++ b/core/src/index/thirdparty/faiss/gpu/GpuIndexIVFFlat.cu
@@ -261,3 +261,4 @@ GpuIndexIVFFlat::searchImpl_(int n,
 
 
 } } // namespace
+
diff --git a/core/src/index/thirdparty/faiss/gpu/GpuIndexIVFPQ.cu b/core/src/index/thirdparty/faiss/gpu/GpuIndexIVFPQ.cu
index 254c0c4104..d6095a58e8 100644
--- a/core/src/index/thirdparty/faiss/gpu/GpuIndexIVFPQ.cu
+++ b/core/src/index/thirdparty/faiss/gpu/GpuIndexIVFPQ.cu
@@ -34,6 +34,10 @@ GpuIndexIVFPQ::GpuIndexIVFPQ(GpuResources* resources,
     bitsPerCode_(0),
     reserveMemoryVecs_(0),
     index_(nullptr) {
+#ifndef FAISS_USE_FLOAT16
+    FAISS_ASSERT(!ivfpqConfig_.useFloat16LookupTables);
+#endif
+
   copyFrom(index);
 }
 
@@ -55,6 +59,10 @@ GpuIndexIVFPQ::GpuIndexIVFPQ(GpuResources* resources,
     bitsPerCode_(bitsPerCode),
     reserveMemoryVecs_(0),
     index_(nullptr) {
+#ifndef FAISS_USE_FLOAT16
+    FAISS_ASSERT(!config.useFloat16LookupTables);
+#endif
+
   verifySettings_();
 
   // We haven't trained ourselves, so don't construct the PQ index yet
@@ -424,9 +432,11 @@ GpuIndexIVFPQ::verifySettings_() const {
   // We must have enough shared memory on the current device to store
   // our lookup distances
   int lookupTableSize = sizeof(float);
+#ifdef FAISS_USE_FLOAT16
   if (ivfpqConfig_.useFloat16LookupTables) {
     lookupTableSize = sizeof(half);
   }
+#endif
 
   // 64 bytes per code is only supported with usage of float16, at 2^8
   // codes per subquantizer
diff --git a/core/src/index/thirdparty/faiss/gpu/impl/BroadcastSum.cu b/core/src/index/thirdparty/faiss/gpu/impl/BroadcastSum.cu
index 364200c3e4..e9f7548e25 100644
--- a/core/src/index/thirdparty/faiss/gpu/impl/BroadcastSum.cu
+++ b/core/src/index/thirdparty/faiss/gpu/impl/BroadcastSum.cu
@@ -262,11 +262,13 @@ void runSumAlongColumns(Tensor<float, 1, true>& input,
   runSumAlongColumns<float, float4>(input, output, stream);
 }
 
+#ifdef FAISS_USE_FLOAT16
 void runSumAlongColumns(Tensor<half, 1, true>& input,
                         Tensor<half, 2, true>& output,
                         cudaStream_t stream) {
   runSumAlongColumns<half, half2>(input, output, stream);
 }
+#endif
 
 template <typename T, typename TVec>
 void runAssignAlongColumns(Tensor<T, 1, true>& input,
@@ -310,11 +312,13 @@ void runAssignAlongColumns(Tensor<float, 1, true>& input,
   runAssignAlongColumns<float, float4>(input, output, stream);
 }
 
+#ifdef FAISS_USE_FLOAT16
 void runAssignAlongColumns(Tensor<half, 1, true>& input,
                            Tensor<half, 2, true>& output,
                            cudaStream_t stream) {
   runAssignAlongColumns<half, half2>(input, output, stream);
 }
+#endif
 
 template <typename T>
 void runSumAlongRows(Tensor<T, 1, true>& input,
@@ -344,11 +348,13 @@ void runSumAlongRows(Tensor<float, 1, true>& input,
   runSumAlongRows<float>(input, output, zeroClamp, stream);
 }
 
+#ifdef FAISS_USE_FLOAT16
 void runSumAlongRows(Tensor<half, 1, true>& input,
                      Tensor<half, 2, true>& output,
                      bool zeroClamp,
                      cudaStream_t stream) {
   runSumAlongRows<half>(input, output, zeroClamp, stream);
 }
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/impl/BroadcastSum.cuh b/core/src/index/thirdparty/faiss/gpu/impl/BroadcastSum.cuh
index 8c4b27452c..6641aadd40 100644
--- a/core/src/index/thirdparty/faiss/gpu/impl/BroadcastSum.cuh
+++ b/core/src/index/thirdparty/faiss/gpu/impl/BroadcastSum.cuh
@@ -17,18 +17,22 @@ void runSumAlongColumns(Tensor<float, 1, true>& input,
                         Tensor<float, 2, true>& output,
                         cudaStream_t stream);
 
+#ifdef FAISS_USE_FLOAT16
 void runSumAlongColumns(Tensor<half, 1, true>& input,
                         Tensor<half, 2, true>& output,
                         cudaStream_t stream);
+#endif
 
 // output[x][i] = input[i] for all x
 void runAssignAlongColumns(Tensor<float, 1, true>& input,
                            Tensor<float, 2, true>& output,
                            cudaStream_t stream);
 
+#ifdef FAISS_USE_FLOAT16
 void runAssignAlongColumns(Tensor<half, 1, true>& input,
                            Tensor<half, 2, true>& output,
                            cudaStream_t stream);
+#endif
 
 // output[i][x] += input[i] for all x
 // If zeroClamp, output[i][x] = max(output[i][x] + input[i], 0) for all x
@@ -37,9 +41,11 @@ void runSumAlongRows(Tensor<float, 1, true>& input,
                      bool zeroClamp,
                      cudaStream_t stream);
 
+#ifdef FAISS_USE_FLOAT16
 void runSumAlongRows(Tensor<half, 1, true>& input,
                      Tensor<half, 2, true>& output,
                      bool zeroClamp,
                      cudaStream_t stream);
 
+#endif
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/impl/Distance.cu b/core/src/index/thirdparty/faiss/gpu/impl/Distance.cu
index e4aa3af1fc..0856396cc1 100644
--- a/core/src/index/thirdparty/faiss/gpu/impl/Distance.cu
+++ b/core/src/index/thirdparty/faiss/gpu/impl/Distance.cu
@@ -370,6 +370,7 @@ runIPDistance(GpuResources* resources,
                        outIndices);
 }
 
+#ifdef FAISS_USE_FLOAT16
 void
 runIPDistance(GpuResources* resources,
               Tensor<half, 2, true>& vectors,
@@ -390,6 +391,7 @@ runIPDistance(GpuResources* resources,
                       outDistances,
                       outIndices);
 }
+#endif
 
 void
 runL2Distance(GpuResources* resources,
@@ -416,6 +418,7 @@ runL2Distance(GpuResources* resources,
                        ignoreOutDistances);
 }
 
+#ifdef FAISS_USE_FLOAT16
 void
 runL2Distance(GpuResources* resources,
               Tensor<half, 2, true>& vectors,
@@ -440,5 +443,6 @@ runL2Distance(GpuResources* resources,
                       outIndices,
                       ignoreOutDistances);
 }
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/impl/Distance.cuh b/core/src/index/thirdparty/faiss/gpu/impl/Distance.cuh
index 844d420aea..3430ddf87f 100644
--- a/core/src/index/thirdparty/faiss/gpu/impl/Distance.cuh
+++ b/core/src/index/thirdparty/faiss/gpu/impl/Distance.cuh
@@ -57,6 +57,7 @@ void runIPDistance(GpuResources* resources,
                    Tensor<float, 2, true>& outDistances,
                    Tensor<int, 2, true>& outIndices);
 
+
 void runL2Distance(GpuResources* resources,
                    Tensor<half, 2, true>& vectors,
                    bool vectorsRowMajor,
diff --git a/core/src/index/thirdparty/faiss/gpu/impl/FlatIndex.cu b/core/src/index/thirdparty/faiss/gpu/impl/FlatIndex.cu
index e7545df767..29480fa84f 100644
--- a/core/src/index/thirdparty/faiss/gpu/impl/FlatIndex.cu
+++ b/core/src/index/thirdparty/faiss/gpu/impl/FlatIndex.cu
@@ -29,6 +29,9 @@ FlatIndex::FlatIndex(GpuResources* res,
     space_(space),
     num_(0),
     rawData_(space) {
+#ifndef FAISS_USE_FLOAT16
+    FAISS_ASSERT(!useFloat16_);
+#endif
 }
 
 bool
@@ -38,28 +41,40 @@ FlatIndex::getUseFloat16() const {
 
 /// Returns the number of vectors we contain
 int FlatIndex::getSize() const {
+#ifdef FAISS_USE_FLOAT16
   if (useFloat16_) {
     return vectorsHalf_.getSize(0);
   } else {
     return vectors_.getSize(0);
   }
+#else
+  return vectors_.getSize(0);
+#endif
 }
 
 int FlatIndex::getDim() const {
+#ifdef FAISS_USE_FLOAT16
   if (useFloat16_) {
     return vectorsHalf_.getSize(1);
   } else {
     return vectors_.getSize(1);
   }
+#else
+  return vectors_.getSize(1);
+#endif
 }
 
 void
 FlatIndex::reserve(size_t numVecs, cudaStream_t stream) {
+#ifdef FAISS_USE_FLOAT16
   if (useFloat16_) {
     rawData_.reserve(numVecs * dim_ * sizeof(half), stream);
   } else {
     rawData_.reserve(numVecs * dim_ * sizeof(float), stream);
   }
+#else
+  rawData_.reserve(numVecs * dim_ * sizeof(float), stream);
+#endif
 }
 
 template <>
@@ -70,6 +85,7 @@ FlatIndex::getVectorsRef<float>() {
   return getVectorsFloat32Ref();
 }
 
+#ifdef FAISS_USE_FLOAT16
 template <>
 Tensor<half, 2, true>&
 FlatIndex::getVectorsRef<half>() {
@@ -77,6 +93,7 @@ FlatIndex::getVectorsRef<half>() {
   FAISS_ASSERT(useFloat16_);
   return getVectorsFloat16Ref();
 }
+#endif
 
 Tensor<float, 2, true>&
 FlatIndex::getVectorsFloat32Ref() {
@@ -86,6 +103,7 @@ FlatIndex::getVectorsFloat32Ref() {
   return vectors_;
 }
 
+#ifdef FAISS_USE_FLOAT16
 Tensor<half, 2, true>&
 FlatIndex::getVectorsFloat16Ref() {
   // Should not call this unless we are in float16 mode
@@ -93,6 +111,7 @@ FlatIndex::getVectorsFloat16Ref() {
 
   return vectorsHalf_;
 }
+#endif
 
 DeviceTensor<float, 2, true>
 FlatIndex::getVectorsFloat32Copy(cudaStream_t stream) {
@@ -103,12 +122,16 @@ DeviceTensor<float, 2, true>
 FlatIndex::getVectorsFloat32Copy(int from, int num, cudaStream_t stream) {
   DeviceTensor<float, 2, true> vecFloat32({num, dim_}, space_);
 
+#ifdef FAISS_USE_FLOAT16
   if (useFloat16_) {
     auto halfNarrow = vectorsHalf_.narrowOutermost(from, num);
     convertTensor<half, float, 2>(stream, halfNarrow, vecFloat32);
   } else {
     vectors_.copyTo(vecFloat32, stream);
   }
+#else
+    vectors_.copyTo(vecFloat32, stream);
+#endif
 
   return vecFloat32;
 }
@@ -125,13 +148,16 @@ FlatIndex::query(Tensor<float, 2, true>& input,
   auto stream = resources_->getDefaultStreamCurrentDevice();
   auto& mem = resources_->getMemoryManagerCurrentDevice();
 
+#ifdef FAISS_USE_FLOAT16
   if (useFloat16_) {
     // We need to convert the input to float16 for comparison to ourselves
+
     auto inputHalf =
       convertTensor<float, half, 2>(resources_, stream, input);
 
     query(inputHalf, bitset, k, metric, metricArg,
           outDistances, outIndices, exactDistance);
+
   } else {
     bfKnnOnDevice(resources_,
                   getCurrentDevice(),
@@ -149,8 +175,26 @@ FlatIndex::query(Tensor<float, 2, true>& input,
                   outIndices,
                   !exactDistance);
   }
+#else
+    bfKnnOnDevice(resources_,
+                  getCurrentDevice(),
+                  stream,
+                  storeTransposed_ ? vectorsTransposed_ : vectors_,
+                  !storeTransposed_, // is vectors row major?
+                  &norms_,
+                  input,
+                  true, // input is row major
+                  bitset,
+                  k,
+                  metric,
+                  metricArg,
+                  outDistances,
+                  outIndices,
+                  !exactDistance);
+#endif
 }
 
+#ifdef FAISS_USE_FLOAT16
 void
 FlatIndex::query(Tensor<half, 2, true>& input,
                  Tensor<uint8_t, 1, true>& bitset,
@@ -178,11 +222,13 @@ FlatIndex::query(Tensor<half, 2, true>& input,
                 outIndices,
                 !exactDistance);
 }
+#endif
 
 void
 FlatIndex::computeResidual(Tensor<float, 2, true>& vecs,
                            Tensor<int, 1, true>& listIds,
                            Tensor<float, 2, true>& residuals) {
+#ifdef FAISS_USE_FLOAT16
   if (useFloat16_) {
     runCalcResidual(vecs,
                     getVectorsFloat16Ref(),
@@ -196,11 +242,19 @@ FlatIndex::computeResidual(Tensor<float, 2, true>& vecs,
                     residuals,
                     resources_->getDefaultStreamCurrentDevice());
   }
+#else
+  runCalcResidual(vecs,
+                  getVectorsFloat32Ref(),
+                  listIds,
+                  residuals,
+                  resources_->getDefaultStreamCurrentDevice());
+#endif
 }
 
 void
 FlatIndex::reconstruct(Tensor<int, 1, true>& listIds,
                        Tensor<float, 2, true>& vecs) {
+#ifdef FAISS_USE_FLOAT16
   if (useFloat16_) {
     runReconstruct(listIds,
                    getVectorsFloat16Ref(),
@@ -212,8 +266,13 @@ FlatIndex::reconstruct(Tensor<int, 1, true>& listIds,
                    vecs,
                    resources_->getDefaultStreamCurrentDevice());
   }
+#else
+  runReconstruct(listIds,
+                 getVectorsFloat32Ref(),
+                 vecs,
+                 resources_->getDefaultStreamCurrentDevice());
+#endif
 }
-
 void
 FlatIndex::reconstruct(Tensor<int, 2, true>& listIds,
                        Tensor<float, 3, true>& vecs) {
@@ -229,6 +288,7 @@ FlatIndex::add(const float* data, int numVecs, cudaStream_t stream) {
     return;
   }
 
+#ifdef FAISS_USE_FLOAT16
   if (useFloat16_) {
     // Make sure that `data` is on our device; we'll run the
     // conversion on our device
@@ -252,8 +312,15 @@ FlatIndex::add(const float* data, int numVecs, cudaStream_t stream) {
                     true /* reserve exactly */);
   }
 
+#else
+  rawData_.append((char*) data,
+          (size_t) dim_ * numVecs * sizeof(float),
+          stream,
+          true /* reserve exactly */);
+#endif
   num_ += numVecs;
 
+#ifdef FAISS_USE_FLOAT16
   if (useFloat16_) {
     DeviceTensor<half, 2, true> vectorsHalf(
       (half*) rawData_.data(), {(int) num_, dim_}, space_);
@@ -263,8 +330,14 @@ FlatIndex::add(const float* data, int numVecs, cudaStream_t stream) {
       (float*) rawData_.data(), {(int) num_, dim_}, space_);
     vectors_ = std::move(vectors);
   }
+#else
+  DeviceTensor<float, 2, true> vectors(
+     (float*) rawData_.data(), {(int) num_, dim_}, space_);
+    vectors_ = std::move(vectors);
+#endif
 
   if (storeTransposed_) {
+#ifdef FAISS_USE_FLOAT16
     if (useFloat16_) {
       vectorsHalfTransposed_ =
         std::move(DeviceTensor<half, 2, true>({dim_, (int) num_}, space_));
@@ -274,9 +347,15 @@ FlatIndex::add(const float* data, int numVecs, cudaStream_t stream) {
         std::move(DeviceTensor<float, 2, true>({dim_, (int) num_}, space_));
       runTransposeAny(vectors_, 0, 1, vectorsTransposed_, stream);
     }
+#else
+  vectorsTransposed_ =
+    std::move(DeviceTensor<float, 2, true>({dim_, (int) num_}, space_));
+  runTransposeAny(vectors_, 0, 1, vectorsTransposed_, stream);
+#endif
   }
 
   // Precompute L2 norms of our database
+#ifdef FAISS_USE_FLOAT16
   if (useFloat16_) {
     DeviceTensor<float, 1, true> norms({(int) num_}, space_);
     runL2Norm(vectorsHalf_, true, norms, true, stream);
@@ -286,6 +365,11 @@ FlatIndex::add(const float* data, int numVecs, cudaStream_t stream) {
     runL2Norm(vectors_, true, norms, true, stream);
     norms_ = std::move(norms);
   }
+#else
+  DeviceTensor<float, 1, true> norms({(int) num_}, space_);
+  runL2Norm(vectors_, true, norms, true, stream);
+  norms_ = std::move(norms);
+#endif
 }
 
 void
@@ -293,8 +377,10 @@ FlatIndex::reset() {
   rawData_.clear();
   vectors_ = std::move(DeviceTensor<float, 2, true>());
   vectorsTransposed_ = std::move(DeviceTensor<float, 2, true>());
+#ifdef FAISS_USE_FLOAT16
   vectorsHalf_ = std::move(DeviceTensor<half, 2, true>());
   vectorsHalfTransposed_ = std::move(DeviceTensor<half, 2, true>());
+#endif
   norms_ = std::move(DeviceTensor<float, 1, true>());
   num_ = 0;
 }
diff --git a/core/src/index/thirdparty/faiss/gpu/impl/FlatIndex.cuh b/core/src/index/thirdparty/faiss/gpu/impl/FlatIndex.cuh
index 5bc97441c4..eef07df24c 100644
--- a/core/src/index/thirdparty/faiss/gpu/impl/FlatIndex.cuh
+++ b/core/src/index/thirdparty/faiss/gpu/impl/FlatIndex.cuh
@@ -47,7 +47,9 @@ class FlatIndex {
   Tensor<float, 2, true>& getVectorsFloat32Ref();
 
   /// Returns a reference to our vectors currently in use (useFloat16 mode)
+#ifdef FAISS_USE_FLOAT16
   Tensor<half, 2, true>& getVectorsFloat16Ref();
+#endif
 
   /// Performs a copy of the vectors on the given device, converting
   /// as needed from float16
@@ -67,6 +69,7 @@ class FlatIndex {
              Tensor<int, 2, true>& outIndices,
              bool exactDistance);
 
+#ifdef FAISS_USE_FLOAT16
   void query(Tensor<half, 2, true>& vecs,
              Tensor<uint8_t, 1, true>& bitset,
              int k,
@@ -75,6 +78,7 @@ class FlatIndex {
              Tensor<float, 2, true>& outDistances,
              Tensor<int, 2, true>& outIndices,
              bool exactDistance);
+#endif
 
   /// Compute residual for set of vectors
   void computeResidual(Tensor<float, 2, true>& vecs,
@@ -123,8 +127,10 @@ class FlatIndex {
   DeviceTensor<float, 2, true> vectorsTransposed_;
 
   /// Vectors currently in rawData_, float16 form
+#ifdef FAISS_USE_FLOAT16
   DeviceTensor<half, 2, true> vectorsHalf_;
   DeviceTensor<half, 2, true> vectorsHalfTransposed_;
+#endif
 
   /// Precomputed L2 norms
   DeviceTensor<float, 1, true> norms_;
diff --git a/core/src/index/thirdparty/faiss/gpu/impl/IVFPQ.cu b/core/src/index/thirdparty/faiss/gpu/impl/IVFPQ.cu
index 01c91f9c3f..48254c1f5b 100644
--- a/core/src/index/thirdparty/faiss/gpu/impl/IVFPQ.cu
+++ b/core/src/index/thirdparty/faiss/gpu/impl/IVFPQ.cu
@@ -60,6 +60,10 @@ IVFPQ::IVFPQ(GpuResources* resources,
   FAISS_ASSERT(dim_ % numSubQuantizers_ == 0);
   FAISS_ASSERT(isSupportedPQCodeLength(bytesPerVector_));
 
+#ifndef FAISS_USE_FLOAT16
+    FAISS_ASSERT(!useFloat16LookupTables_);
+#endif
+
   setPQCentroids_(pqCentroidData);
 }
 
@@ -112,7 +116,9 @@ IVFPQ::setPrecomputedCodes(bool enable) {
     } else {
       // Clear out old precomputed code data
       precomputedCode_ = std::move(DeviceTensor<float, 3, true>());
+#ifdef FAISS_USE_FLOAT16
       precomputedCodeHalf_ = std::move(DeviceTensor<half, 3, true>());
+#endif
     }
   }
 }
@@ -156,6 +162,7 @@ IVFPQ::classifyAndAddVectors(Tensor<float, 2, true>& vecs,
   DeviceTensor<float, 2, true> residuals(
     mem, {vecs.getSize(0), vecs.getSize(1)}, stream);
 
+#ifdef FAISS_USE_FLOAT16
   if (quantizer_->getUseFloat16()) {
     auto& coarseCentroids = quantizer_->getVectorsFloat16Ref();
     runCalcResidual(vecs, coarseCentroids, listIds, residuals, stream);
@@ -163,6 +170,10 @@ IVFPQ::classifyAndAddVectors(Tensor<float, 2, true>& vecs,
     auto& coarseCentroids = quantizer_->getVectorsFloat32Ref();
     runCalcResidual(vecs, coarseCentroids, listIds, residuals, stream);
   }
+#else
+    auto& coarseCentroids = quantizer_->getVectorsFloat32Ref();
+    runCalcResidual(vecs, coarseCentroids, listIds, residuals, stream);
+#endif
 
   // Residuals are in the form
   // (vec x numSubQuantizer x dimPerSubQuantizer)
@@ -519,6 +530,7 @@ IVFPQ::precomputeCodesT_() {
 
   // We added into the view, so `coarsePQProductTransposed` is now our
   // precomputed term 2.
+#ifdef FAISS_USE_FLOAT16
   if (useFloat16LookupTables_) {
     precomputedCodeHalf_ =
       convertTensor<float, half, 3>(resources_,
@@ -527,15 +539,23 @@ IVFPQ::precomputeCodesT_() {
   } else {
     precomputedCode_ = std::move(coarsePQProductTransposed);
   }
+#else
+  precomputedCode_ = std::move(coarsePQProductTransposed);
+#endif
+
 }
 
 void
 IVFPQ::precomputeCodes_() {
+#ifdef FAISS_USE_FLOAT16
   if (quantizer_->getUseFloat16()) {
     precomputeCodesT_<half>();
   } else {
     precomputeCodesT_<float>();
   }
+#else
+  precomputeCodesT_<float>();
+#endif
 }
 
 void
@@ -678,6 +698,7 @@ IVFPQ::runPQPrecomputedCodes_(
 
   NoTypeTensor<3, true> term2;
   NoTypeTensor<3, true> term3;
+#ifdef FAISS_USE_FLOAT16
   DeviceTensor<half, 3, true> term3Half;
 
   if (useFloat16LookupTables_) {
@@ -686,7 +707,10 @@ IVFPQ::runPQPrecomputedCodes_(
 
     term2 = NoTypeTensor<3, true>(precomputedCodeHalf_);
     term3 = NoTypeTensor<3, true>(term3Half);
-  } else {
+  }
+#endif
+
+  if (!useFloat16LookupTables_) {
     term2 = NoTypeTensor<3, true>(precomputedCode_);
     term3 = NoTypeTensor<3, true>(term3Transposed);
   }
@@ -754,6 +778,7 @@ IVFPQ::runPQNoPrecomputedCodes_(
   int k,
   Tensor<float, 2, true>& outDistances,
   Tensor<long, 2, true>& outIndices) {
+#ifdef FAISS_USE_FLOAT16
   if (quantizer_->getUseFloat16()) {
     runPQNoPrecomputedCodesT_<half>(queries,
                                     bitset,
@@ -770,7 +795,17 @@ IVFPQ::runPQNoPrecomputedCodes_(
                                      k,
                                      outDistances,
                                      outIndices);
-  }
+    }
+#else
+    runPQNoPrecomputedCodesT_<float>(queries,
+                                     bitset,
+                                     coarseDistances,
+                                     coarseIndices,
+                                     k,
+                                     outDistances,
+                                     outIndices);
+#endif
+
 }
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/impl/IVFPQ.cuh b/core/src/index/thirdparty/faiss/gpu/impl/IVFPQ.cuh
index db8cbb68aa..ad03fb4f89 100644
--- a/core/src/index/thirdparty/faiss/gpu/impl/IVFPQ.cuh
+++ b/core/src/index/thirdparty/faiss/gpu/impl/IVFPQ.cuh
@@ -153,7 +153,9 @@ class IVFPQ : public IVFBase {
   DeviceTensor<float, 3, true> precomputedCode_;
 
   /// Precomputed term 2 in half form
+#ifdef FAISS_USE_FLOAT16
   DeviceTensor<half, 3, true> precomputedCodeHalf_;
+#endif
 };
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/impl/L2Norm.cu b/core/src/index/thirdparty/faiss/gpu/impl/L2Norm.cu
index 96bcd8e95b..bdf812524e 100644
--- a/core/src/index/thirdparty/faiss/gpu/impl/L2Norm.cu
+++ b/core/src/index/thirdparty/faiss/gpu/impl/L2Norm.cu
@@ -309,6 +309,7 @@ void runL2Norm(Tensor<float, 2, true>& input,
   }
 }
 
+#ifdef FAISS_USE_FLOAT16
 void runL2Norm(Tensor<half, 2, true>& input,
                bool inputRowMajor,
                Tensor<float, 1, true>& output,
@@ -325,5 +326,6 @@ void runL2Norm(Tensor<half, 2, true>& input,
       inputCast, inputRowMajor, outputCast, normSquared, stream);
   }
 }
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/impl/L2Norm.cuh b/core/src/index/thirdparty/faiss/gpu/impl/L2Norm.cuh
index c4d5850802..6df3dcea58 100644
--- a/core/src/index/thirdparty/faiss/gpu/impl/L2Norm.cuh
+++ b/core/src/index/thirdparty/faiss/gpu/impl/L2Norm.cuh
@@ -18,10 +18,12 @@ void runL2Norm(Tensor<float, 2, true>& input,
                bool normSquared,
                cudaStream_t stream);
 
+#ifdef FAISS_USE_FLOAT16
 void runL2Norm(Tensor<half, 2, true>& input,
                bool inputRowMajor,
                Tensor<float, 1, true>& output,
                bool normSquared,
                cudaStream_t stream);
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/impl/PQCodeDistances-inl.cuh b/core/src/index/thirdparty/faiss/gpu/impl/PQCodeDistances-inl.cuh
index c3ef87f2e7..520a8bcafb 100644
--- a/core/src/index/thirdparty/faiss/gpu/impl/PQCodeDistances-inl.cuh
+++ b/core/src/index/thirdparty/faiss/gpu/impl/PQCodeDistances-inl.cuh
@@ -438,6 +438,7 @@ runPQCodeDistancesMM(Tensor<float, 3, true>& pqCentroids,
 
   runSumAlongColumns(pqCentroidsNorm, outDistancesCodeViewCols, stream);
 
+#ifdef FAISS_USE_FLOAT16
   if (useFloat16Lookup) {
     // Need to convert back
     auto outCodeDistancesH = outCodeDistances.toTensor<half>();
@@ -445,6 +446,7 @@ runPQCodeDistancesMM(Tensor<float, 3, true>& pqCentroids,
                                   outCodeDistancesF,
                                   outCodeDistancesH);
   }
+#endif
 }
 
 template <typename CentroidT>
@@ -477,6 +479,7 @@ runPQCodeDistances(Tensor<float, 3, true>& pqCentroids,
   auto smem = (3 * dimsPerSubQuantizer) * sizeof(float)
     + topQueryToCentroid.getSize(1) * sizeof(int);
 
+#ifdef FAISS_USE_FLOAT16
 #define RUN_CODE(DIMS, L2)                                              \
   do {                                                                  \
     if (useFloat16Lookup) {                                             \
@@ -495,6 +498,16 @@ runPQCodeDistances(Tensor<float, 3, true>& pqCentroids,
         topQueryToCentroid, outCodeDistancesT);                         \
     }                                                                   \
   } while (0)
+#else
+#define RUN_CODE(DIMS, L2)                                               \
+    do {                                                                 \
+      auto outCodeDistancesT = outCodeDistances.toTensor<float>();      \
+      pqCodeDistances<float, CentroidT, DIMS, L2><<<grid, block, smem, stream>>>( \
+        queries, kQueriesPerBlock,                                      \
+        coarseCentroids, pqCentroids,                                   \
+        topQueryToCentroid, outCodeDistancesT);                         \
+    } while (0)
+#endif
 
 #define CODE_L2(DIMS)                           \
   do {                                          \
diff --git a/core/src/index/thirdparty/faiss/gpu/impl/PQCodeDistances.cu b/core/src/index/thirdparty/faiss/gpu/impl/PQCodeDistances.cu
index 817990b4a6..eec8852310 100644
--- a/core/src/index/thirdparty/faiss/gpu/impl/PQCodeDistances.cu
+++ b/core/src/index/thirdparty/faiss/gpu/impl/PQCodeDistances.cu
@@ -26,10 +26,12 @@ template <typename T>
 struct Converter {
 };
 
+#ifdef FAISS_USE_FLOAT16
 template <>
 struct Converter<half> {
   inline static __device__ half to(float v) { return __float2half(v); }
 };
+#endif
 
 template <>
 struct Converter<float> {
@@ -394,6 +396,7 @@ runPQCodeDistancesMM(Tensor<float, 3, true>& pqCentroids,
   Tensor<float, 4, true> outCodeDistancesF;
   DeviceTensor<float, 4, true> outCodeDistancesFloatMem;
 
+#ifdef FAISS_USE_FLOAT16
   if (useFloat16Lookup) {
     outCodeDistancesFloatMem = DeviceTensor<float, 4, true>(
       mem, {outCodeDistances.getSize(0),
@@ -406,6 +409,9 @@ runPQCodeDistancesMM(Tensor<float, 3, true>& pqCentroids,
   } else {
     outCodeDistancesF = outCodeDistances.toTensor<float>();
   }
+#else
+  outCodeDistancesF = outCodeDistances.toTensor<float>();
+#endif
 
   // Transpose -2(sub q)(q * c)(code) to -2(q * c)(sub q)(code) (which
   // is where we build our output distances)
@@ -445,6 +451,7 @@ runPQCodeDistancesMM(Tensor<float, 3, true>& pqCentroids,
 
   runSumAlongColumns(pqCentroidsNorm, outDistancesCodeViewCols, stream);
 
+#ifdef FAISS_USE_FLOAT16
   if (useFloat16Lookup) {
     // Need to convert back
     auto outCodeDistancesH = outCodeDistances.toTensor<half>();
@@ -452,6 +459,7 @@ runPQCodeDistancesMM(Tensor<float, 3, true>& pqCentroids,
                                   outCodeDistancesF,
                                   outCodeDistancesH);
   }
+#endif
 }
 
 void
@@ -483,6 +491,7 @@ runPQCodeDistances(Tensor<float, 3, true>& pqCentroids,
   auto smem = (3 * dimsPerSubQuantizer) * sizeof(float)
     + topQueryToCentroid.getSize(1) * sizeof(int);
 
+#ifdef FAISS_USE_FLOAT16
 #define RUN_CODE(DIMS, L2)                                              \
   do {                                                                  \
     if (useFloat16Lookup) {                                             \
@@ -492,7 +501,19 @@ runPQCodeDistances(Tensor<float, 3, true>& pqCentroids,
         queries, kQueriesPerBlock,                                      \
         coarseCentroids, pqCentroids,                                   \
         topQueryToCentroid, outCodeDistancesT);                         \
-    } else {                                                            \
+    } else {                                                              \
+     auto outCodeDistancesT = outCodeDistances.toTensor<float>();       \
+                                                                        \
+      pqCodeDistances<float, DIMS, L2><<<grid, block, smem, stream>>>(  \
+        queries, kQueriesPerBlock,                                      \
+        coarseCentroids, pqCentroids,                                   \
+        topQueryToCentroid, outCodeDistancesT);                         \
+   }                                                                    \
+  } while (0)
+#else
+#define RUN_CODE(DIMS, L2)                                              \
+    do {                                                                \
+      if(!useFloat16Lookup){                                            \
       auto outCodeDistancesT = outCodeDistances.toTensor<float>();      \
                                                                         \
       pqCodeDistances<float, DIMS, L2><<<grid, block, smem, stream>>>(  \
@@ -501,6 +522,7 @@ runPQCodeDistances(Tensor<float, 3, true>& pqCentroids,
         topQueryToCentroid, outCodeDistancesT);                         \
     }                                                                   \
   } while (0)
+#endif
 
 #define CODE_L2(DIMS)                           \
   do {                                          \
diff --git a/core/src/index/thirdparty/faiss/gpu/impl/PQScanMultiPassNoPrecomputed-inl.cuh b/core/src/index/thirdparty/faiss/gpu/impl/PQScanMultiPassNoPrecomputed-inl.cuh
index ffc81b1f8c..a77e783d09 100644
--- a/core/src/index/thirdparty/faiss/gpu/impl/PQScanMultiPassNoPrecomputed-inl.cuh
+++ b/core/src/index/thirdparty/faiss/gpu/impl/PQScanMultiPassNoPrecomputed-inl.cuh
@@ -275,7 +275,12 @@ runMultiPassTile(Tensor<float, 2, true>& queries,
     auto block = dim3(kThreadsPerBlock);
 
     // pq centroid distances
-    auto smem = useFloat16Lookup ? sizeof(half) : sizeof(float);
+
+#ifdef FAISS_USE_FLOAT16
+    auto smem = (sizeof(float)== useFloat16Lookup) ? sizeof(half) : sizeof(float);
+#else
+    auto smem = sizeof(float);
+#endif
 
     smem *= numSubQuantizers * numSubQuantizerCodes;
     FAISS_ASSERT(smem <= getMaxSharedMemPerBlockCurrentDevice());
@@ -296,6 +301,7 @@ runMultiPassTile(Tensor<float, 2, true>& queries,
           allDistances);                                                \
     } while (0)
 
+#ifdef FAISS_USE_FLOAT16
 #define RUN_PQ(NUM_SUB_Q)                       \
     do {                                        \
       if (useFloat16Lookup) {                   \
@@ -304,6 +310,12 @@ runMultiPassTile(Tensor<float, 2, true>& queries,
         RUN_PQ_OPT(NUM_SUB_Q, float, float4);   \
       }                                         \
     } while (0)
+#else
+#define RUN_PQ(NUM_SUB_Q)                       \
+    do {                                        \
+        RUN_PQ_OPT(NUM_SUB_Q, float, float4);   \
+    } while (0)
+#endif
 
     switch (bytesPerCode) {
       case 1:
@@ -499,7 +511,12 @@ runPQScanMultiPassNoPrecomputed(Tensor<float, 2, true>& queries,
                               sizeof(int),
                               stream));
 
-  int codeDistanceTypeSize = useFloat16Lookup ? sizeof(half) : sizeof(float);
+  int codeDistanceTypeSize = sizeof(float);
+#ifdef FAISS_USE_FLOAT16
+  if (useFloat16Lookup) {
+        codeDistanceTypeSize = sizeof(half);
+  }
+#endif
 
   int totalCodeDistancesSize =
     queryTileSize * nprobe * numSubQuantizers * numSubQuantizerCodes *
diff --git a/core/src/index/thirdparty/faiss/gpu/impl/PQScanMultiPassNoPrecomputed.cu b/core/src/index/thirdparty/faiss/gpu/impl/PQScanMultiPassNoPrecomputed.cu
index ecf35fffdb..b4934382cb 100644
--- a/core/src/index/thirdparty/faiss/gpu/impl/PQScanMultiPassNoPrecomputed.cu
+++ b/core/src/index/thirdparty/faiss/gpu/impl/PQScanMultiPassNoPrecomputed.cu
@@ -248,8 +248,11 @@ runMultiPassTile(Tensor<float, 2, true>& queries,
                metric == MetricType::METRIC_L2);
 
   bool l2Distance = metric == MetricType::METRIC_L2;
-
   // Calculate offset lengths, so we know where to write out
+#ifndef FAISS_USE_FLOAT16
+    FAISS_ASSERT(!useFloat16Lookup);
+#endif
+    // Calculate offset lengths, so we know where to write out
   // intermediate results
   runCalcListOffsets(topQueryToCentroid, listLengths, prefixSumOffsets,
                      thrustMem, stream);
@@ -275,7 +278,13 @@ runMultiPassTile(Tensor<float, 2, true>& queries,
     auto block = dim3(kThreadsPerBlock);
 
     // pq centroid distances
-    auto smem = useFloat16Lookup ? sizeof(half) : sizeof(float);
+    //auto smem = useFloat16Lookup ? sizeof(half) : sizeof(float);
+    auto smem = sizeof(float);
+#ifdef FAISS_USE_FLOAT16
+    if (useFloat16Lookup) {
+       smem = sizeof(half);
+    }
+#endif
 
     smem *= numSubQuantizers * numSubQuantizerCodes;
     FAISS_ASSERT(smem <= getMaxSharedMemPerBlockCurrentDevice());
@@ -296,6 +305,7 @@ runMultiPassTile(Tensor<float, 2, true>& queries,
           allDistances);                                                \
     } while (0)
 
+#ifdef FAISS_USE_FLOAT16
 #define RUN_PQ(NUM_SUB_Q)                       \
     do {                                        \
       if (useFloat16Lookup) {                   \
@@ -304,6 +314,12 @@ runMultiPassTile(Tensor<float, 2, true>& queries,
         RUN_PQ_OPT(NUM_SUB_Q, float, float4);   \
       }                                         \
     } while (0)
+#else
+#define  RUN_PQ(NUM_SUB_Q)                      \
+    do {                                        \
+      RUN_PQ_OPT(NUM_SUB_Q, float, float4);     \
+    } while (0)
+#endif // FAISS_USE_FLOAT16
 
     switch (bytesPerCode) {
       case 1:
@@ -497,7 +513,14 @@ void runPQScanMultiPassNoPrecomputed(Tensor<float, 2, true>& queries,
                               sizeof(int),
                               stream));
 
-  int codeDistanceTypeSize = useFloat16Lookup ? sizeof(half) : sizeof(float);
+  int codeDistanceTypeSize = sizeof(float);
+#ifdef FAISS_USE_FLOAT16
+  if (useFloat16Lookup) {
+     codeDistanceTypeSize = sizeof(half);
+  }
+#else
+    FAISS_ASSERT(!useFloat16Lookup);
+#endif
 
   int totalCodeDistancesSize =
     queryTileSize * nprobe * numSubQuantizers * numSubQuantizerCodes *
diff --git a/core/src/index/thirdparty/faiss/gpu/impl/PQScanMultiPassPrecomputed.cu b/core/src/index/thirdparty/faiss/gpu/impl/PQScanMultiPassPrecomputed.cu
index 583ee477dc..02e65ff32a 100644
--- a/core/src/index/thirdparty/faiss/gpu/impl/PQScanMultiPassPrecomputed.cu
+++ b/core/src/index/thirdparty/faiss/gpu/impl/PQScanMultiPassPrecomputed.cu
@@ -252,7 +252,12 @@ runMultiPassTile(Tensor<float, 2, true>& queries,
     auto block = dim3(kThreadsPerBlock);
 
     // pq precomputed terms (2 + 3)
-    auto smem = useFloat16Lookup ? sizeof(half) : sizeof(float);
+    auto smem = sizeof(float);
+#ifdef FAISS_USE_FLOAT16
+    if (useFloat16Lookup) {
+      smem = sizeof(half);
+    }
+#endif
 
     smem *= numSubQuantizers * numSubQuantizerCodes;
     FAISS_ASSERT(smem <= getMaxSharedMemPerBlockCurrentDevice());
@@ -275,6 +280,7 @@ runMultiPassTile(Tensor<float, 2, true>& queries,
           allDistances);                                                \
     } while (0)
 
+#ifdef  FAISS_USE_FLOAT16
 #define RUN_PQ(NUM_SUB_Q)                       \
     do {                                        \
       if (useFloat16Lookup) {                   \
@@ -283,6 +289,12 @@ runMultiPassTile(Tensor<float, 2, true>& queries,
         RUN_PQ_OPT(NUM_SUB_Q, float, float4);   \
       }                                         \
     } while (0)
+#else
+#define RUN_PQ(NUM_SUB_Q)                       \
+    do {                                        \
+      RUN_PQ_OPT(NUM_SUB_Q, float, float4);     \
+    } while (0)
+#endif
 
     switch (bytesPerCode) {
       case 1:
diff --git a/core/src/index/thirdparty/faiss/gpu/impl/VectorResidual.cu b/core/src/index/thirdparty/faiss/gpu/impl/VectorResidual.cu
index 078e660417..980b3c3979 100644
--- a/core/src/index/thirdparty/faiss/gpu/impl/VectorResidual.cu
+++ b/core/src/index/thirdparty/faiss/gpu/impl/VectorResidual.cu
@@ -119,6 +119,7 @@ void runCalcResidual(Tensor<float, 2, true>& vecs,
   calcResidual<float>(vecs, centroids, vecToCentroid, residuals, stream);
 }
 
+#ifdef FAISS_USE_FLOAT16
 void runCalcResidual(Tensor<float, 2, true>& vecs,
                      Tensor<half, 2, true>& centroids,
                      Tensor<int, 1, true>& vecToCentroid,
@@ -126,6 +127,7 @@ void runCalcResidual(Tensor<float, 2, true>& vecs,
                      cudaStream_t stream) {
   calcResidual<half>(vecs, centroids, vecToCentroid, residuals, stream);
 }
+#endif
 
 void runReconstruct(Tensor<int, 1, true>& listIds,
                     Tensor<float, 2, true>& vecs,
@@ -134,11 +136,13 @@ void runReconstruct(Tensor<int, 1, true>& listIds,
   gatherReconstruct<float>(listIds, vecs, out, stream);
 }
 
+#ifdef FAISS_USE_FLOAT16
 void runReconstruct(Tensor<int, 1, true>& listIds,
                     Tensor<half, 2, true>& vecs,
                     Tensor<float, 2, true>& out,
                     cudaStream_t stream) {
   gatherReconstruct<half>(listIds, vecs, out, stream);
 }
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/impl/VectorResidual.cuh b/core/src/index/thirdparty/faiss/gpu/impl/VectorResidual.cuh
index ca7bcaa0b6..8e8cd2e756 100644
--- a/core/src/index/thirdparty/faiss/gpu/impl/VectorResidual.cuh
+++ b/core/src/index/thirdparty/faiss/gpu/impl/VectorResidual.cuh
@@ -31,9 +31,11 @@ void runReconstruct(Tensor<int, 1, true>& listIds,
                     Tensor<float, 2, true>& out,
                     cudaStream_t stream);
 
+#ifdef FAISS_USE_FLOAT16
 void runReconstruct(Tensor<int, 1, true>& listIds,
                     Tensor<half, 2, true>& vecs,
                     Tensor<float, 2, true>& out,
                     cudaStream_t stream);
+# endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/BlockSelectHalf.cu b/core/src/index/thirdparty/faiss/gpu/utils/BlockSelectHalf.cu
index 4f642a0ca8..f6989fc084 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/BlockSelectHalf.cu
+++ b/core/src/index/thirdparty/faiss/gpu/utils/BlockSelectHalf.cu
@@ -10,6 +10,8 @@
 
 namespace faiss { namespace gpu {
 
+#ifdef FAISS_USE_FLOAT16
+
 // warp Q to thread Q:
 // 1, 1
 // 32, 2
@@ -143,4 +145,6 @@ void runBlockSelectPair(Tensor<half, 2, true>& inK,
   }
 }
 
+#endif // FAISS_USE_FLOAT16
+
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/BlockSelectKernel.cuh b/core/src/index/thirdparty/faiss/gpu/utils/BlockSelectKernel.cuh
index 238909d4b0..f787335cdf 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/BlockSelectKernel.cuh
+++ b/core/src/index/thirdparty/faiss/gpu/utils/BlockSelectKernel.cuh
@@ -241,6 +241,7 @@ void runBlockSelectPair(Tensor<float, 2, true>& inKeys,
                         Tensor<int, 2, true>& outIndices,
                         bool dir, int k, cudaStream_t stream);
 
+#ifdef FAISS_USE_FLOAT16
 void runBlockSelect(Tensor<half, 2, true>& in,
                     Tensor<uint8_t, 1, true>& bitset,
                     Tensor<half, 2, true>& outKeys,
@@ -253,5 +254,6 @@ void runBlockSelectPair(Tensor<half, 2, true>& inKeys,
                         Tensor<half, 2, true>& outKeys,
                         Tensor<int, 2, true>& outIndices,
                         bool dir, int k, cudaStream_t stream);
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/ConversionOperators.cuh b/core/src/index/thirdparty/faiss/gpu/utils/ConversionOperators.cuh
index ddc30af173..cf9b74c971 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/ConversionOperators.cuh
+++ b/core/src/index/thirdparty/faiss/gpu/utils/ConversionOperators.cuh
@@ -29,6 +29,7 @@ struct Convert {
   }
 };
 
+#ifdef FAISS_USE_FLOAT16
 template <>
 struct Convert<float, half> {
   inline __device__ half operator()(float v) const {
@@ -42,6 +43,7 @@ struct Convert<half, float> {
     return __half2float(v);
   }
 };
+#endif
 
 template <typename T>
 struct ConvertTo {
@@ -50,38 +52,50 @@ struct ConvertTo {
 template <>
 struct ConvertTo<float> {
   static inline __device__ float to(float v) { return v; }
+#ifdef FAISS_USE_FLOAT16
   static inline __device__ float to(half v) { return __half2float(v); }
+#endif
 };
 
 template <>
 struct ConvertTo<float2> {
   static inline __device__ float2 to(float2 v) { return v; }
+#ifdef FAISS_USE_FLOAT16
   static inline __device__ float2 to(half2 v) { return __half22float2(v); }
+#endif
 };
 
 template <>
 struct ConvertTo<float4> {
   static inline __device__ float4 to(float4 v) { return v; }
+#ifdef FAISS_USE_FLOAT16
   static inline __device__ float4 to(Half4 v) { return half4ToFloat4(v); }
+#endif
 };
 
+#ifdef FAISS_USE_FLOAT16
 template <>
 struct ConvertTo<half> {
   static inline __device__ half to(float v) { return __float2half(v); }
   static inline __device__ half to(half v) { return v; }
 };
+#endif
 
+#ifdef FAISS_USE_FLOAT16
 template <>
 struct ConvertTo<half2> {
   static inline __device__ half2 to(float2 v) { return __float22half2_rn(v); }
   static inline __device__ half2 to(half2 v) { return v; }
 };
+#endif
 
+#ifdef FAISS_USE_FLOAT16
 template <>
 struct ConvertTo<Half4> {
   static inline __device__ Half4 to(float4 v) { return float4ToHalf4(v); }
   static inline __device__ Half4 to(Half4 v) { return v; }
 };
+#endif
 
 // Tensor conversion
 template <typename From, typename To>
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/Float16.cu b/core/src/index/thirdparty/faiss/gpu/utils/Float16.cu
index 52d54df309..e1f5c09b9f 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/Float16.cu
+++ b/core/src/index/thirdparty/faiss/gpu/utils/Float16.cu
@@ -12,6 +12,8 @@
 #include <thrust/execution_policy.h>
 #include <thrust/transform.h>
 
+#ifdef FAISS_USE_FLOAT16
+
 namespace faiss { namespace gpu {
 
 bool getDeviceSupportsFloat16Math(int device) {
@@ -36,3 +38,5 @@ __half hostFloat2Half(float a) {
 }
 
 } } // namespace
+
+#endif // FAISS_USE_FLOAT16
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/Float16.cuh b/core/src/index/thirdparty/faiss/gpu/utils/Float16.cuh
index 09566eaa94..0af798ba80 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/Float16.cuh
+++ b/core/src/index/thirdparty/faiss/gpu/utils/Float16.cuh
@@ -22,10 +22,14 @@
 #define FAISS_USE_FULL_FLOAT16 1
 #endif // __CUDA_ARCH__ types
 
+#ifdef FAISS_USE_FLOAT16
 #include <cuda_fp16.h>
+#endif
 
 namespace faiss { namespace gpu {
 
+#ifdef FAISS_USE_FLOAT16
+
 // 64 bytes containing 4 half (float16) values
 struct Half4 {
   half2 a;
@@ -72,4 +76,6 @@ bool getDeviceSupportsFloat16Math(int device);
 
 __half hostFloat2Half(float v);
 
+#endif // FAISS_USE_FLOAT16
+
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/LoadStoreOperators.cuh b/core/src/index/thirdparty/faiss/gpu/utils/LoadStoreOperators.cuh
index b0bb8b5330..b49d634461 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/LoadStoreOperators.cuh
+++ b/core/src/index/thirdparty/faiss/gpu/utils/LoadStoreOperators.cuh
@@ -35,6 +35,8 @@ struct LoadStore {
   }
 };
 
+#ifdef FAISS_USE_FLOAT16
+
 template <>
 struct LoadStore<Half4> {
   static inline __device__ Half4 load(void* p) {
@@ -87,4 +89,6 @@ struct LoadStore<Half8> {
   }
 };
 
+#endif
+
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/MathOperators.cuh b/core/src/index/thirdparty/faiss/gpu/utils/MathOperators.cuh
index 68ccbd5686..7e9f25a2a0 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/MathOperators.cuh
+++ b/core/src/index/thirdparty/faiss/gpu/utils/MathOperators.cuh
@@ -217,6 +217,7 @@ struct Math<float4> {
   }
 };
 
+#ifdef FAISS_USE_FLOAT16
 template <>
 struct Math<half> {
   typedef half ScalarType;
@@ -555,5 +556,6 @@ struct Math<Half8> {
     return h;
   }
 };
+#endif // FAISS_USE_FLOAT16
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/WarpSelectHalf.cu b/core/src/index/thirdparty/faiss/gpu/utils/WarpSelectHalf.cu
index 54e10be1e5..d700ecaee7 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/WarpSelectHalf.cu
+++ b/core/src/index/thirdparty/faiss/gpu/utils/WarpSelectHalf.cu
@@ -10,6 +10,8 @@
 
 namespace faiss { namespace gpu {
 
+#ifdef FAISS_USE_FLOAT16
+
 // warp Q to thread Q:
 // 1, 1
 // 32, 2
@@ -91,4 +93,6 @@ void runWarpSelect(Tensor<half, 2, true>& in,
   }
 }
 
+#endif // FAISS_USE_FLOAT16
+
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/WarpSelectKernel.cuh b/core/src/index/thirdparty/faiss/gpu/utils/WarpSelectKernel.cuh
index 3c122e8861..1b690b0306 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/WarpSelectKernel.cuh
+++ b/core/src/index/thirdparty/faiss/gpu/utils/WarpSelectKernel.cuh
@@ -62,9 +62,11 @@ void runWarpSelect(Tensor<float, 2, true>& in,
                    Tensor<int, 2, true>& outIndices,
                    bool dir, int k, cudaStream_t stream);
 
+#ifdef FAISS_USE_FLOAT16
 void runWarpSelect(Tensor<half, 2, true>& in,
                    Tensor<half, 2, true>& outKeys,
                    Tensor<int, 2, true>& outIndices,
                    bool dir, int k, cudaStream_t stream);
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/WarpShuffles.cuh b/core/src/index/thirdparty/faiss/gpu/utils/WarpShuffles.cuh
index 504c73f79a..ec2e5b618c 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/WarpShuffles.cuh
+++ b/core/src/index/thirdparty/faiss/gpu/utils/WarpShuffles.cuh
@@ -91,6 +91,7 @@ inline __device__ T* shfl_xor(T* const val,
   return (T*) shfl_xor(v, laneMask, width);
 }
 
+#ifdef FAISS_USE_FLOAT16
 // CUDA 9.0+ has half shuffle
 #if CUDA_VERSION < 9000
 inline __device__ half shfl(half v,
@@ -113,5 +114,6 @@ inline __device__ half shfl_xor(half v,
   return h;
 }
 #endif // CUDA_VERSION
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalf1.cu b/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalf1.cu
index 88f1d21b57..d2525935c2 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalf1.cu
+++ b/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalf1.cu
@@ -9,7 +9,9 @@
 
 namespace faiss { namespace gpu {
 
+#ifdef FAISS_USE_FLOAT16
 BLOCK_SELECT_IMPL(half, true, 1, 1);
 BLOCK_SELECT_IMPL(half, false, 1, 1);
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalf128.cu b/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalf128.cu
index b38c00b83e..3759af9342 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalf128.cu
+++ b/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalf128.cu
@@ -9,7 +9,9 @@
 
 namespace faiss { namespace gpu {
 
+#ifdef FAISS_USE_FLOAT16
 BLOCK_SELECT_IMPL(half, true, 128, 3);
 BLOCK_SELECT_IMPL(half, false, 128, 3);
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalf256.cu b/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalf256.cu
index 2cea11ace2..a8a5cf13e9 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalf256.cu
+++ b/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalf256.cu
@@ -9,7 +9,9 @@
 
 namespace faiss { namespace gpu {
 
+#ifdef FAISS_USE_FLOAT16
 BLOCK_SELECT_IMPL(half, true, 256, 4);
 BLOCK_SELECT_IMPL(half, false, 256, 4);
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalf32.cu b/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalf32.cu
index 6045a52fea..18907c5119 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalf32.cu
+++ b/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalf32.cu
@@ -9,7 +9,9 @@
 
 namespace faiss { namespace gpu {
 
+#ifdef FAISS_USE_FLOAT16
 BLOCK_SELECT_IMPL(half, true, 32, 2);
 BLOCK_SELECT_IMPL(half, false, 32, 2);
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalf64.cu b/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalf64.cu
index ea4b0bf64b..81a9a84a9f 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalf64.cu
+++ b/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalf64.cu
@@ -9,7 +9,9 @@
 
 namespace faiss { namespace gpu {
 
+#ifdef FAISS_USE_FLOAT16
 BLOCK_SELECT_IMPL(half, true, 64, 3);
 BLOCK_SELECT_IMPL(half, false, 64, 3);
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalfF1024.cu b/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalfF1024.cu
index 710e8c8460..e83b615193 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalfF1024.cu
+++ b/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalfF1024.cu
@@ -9,6 +9,8 @@
 
 namespace faiss { namespace gpu {
 
+#ifdef FAISS_USE_FLOAT16
 BLOCK_SELECT_IMPL(half, false, 1024, 8);
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalfF2048.cu b/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalfF2048.cu
index 5f7f4d4f6b..e06c334481 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalfF2048.cu
+++ b/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalfF2048.cu
@@ -11,7 +11,9 @@
 namespace faiss { namespace gpu {
 
 #if GPU_MAX_SELECTION_K >= 2048
+#ifdef FAISS_USE_FLOAT16
 BLOCK_SELECT_IMPL(half, false, 2048, 8);
 #endif
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalfF512.cu b/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalfF512.cu
index 07ea1f9f6b..c1b67bd3de 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalfF512.cu
+++ b/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalfF512.cu
@@ -9,6 +9,8 @@
 
 namespace faiss { namespace gpu {
 
+#ifdef FAISS_USE_FLOAT16
 BLOCK_SELECT_IMPL(half, false, 512, 8);
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalfT1024.cu b/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalfT1024.cu
index 6dc37accf7..2fd0dffa37 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalfT1024.cu
+++ b/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalfT1024.cu
@@ -9,6 +9,8 @@
 
 namespace faiss { namespace gpu {
 
+#ifdef FAISS_USE_FLOAT16
 BLOCK_SELECT_IMPL(half, true, 1024, 8);
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalfT2048.cu b/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalfT2048.cu
index dd38b8d6a5..f91b6787e2 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalfT2048.cu
+++ b/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalfT2048.cu
@@ -11,7 +11,9 @@
 namespace faiss { namespace gpu {
 
 #if GPU_MAX_SELECTION_K >= 2048
+#ifdef FAISS_USE_FLOAT16
 BLOCK_SELECT_IMPL(half, true, 2048, 8);
 #endif
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalfT512.cu b/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalfT512.cu
index ff2a9903fa..a2877db6ed 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalfT512.cu
+++ b/core/src/index/thirdparty/faiss/gpu/utils/blockselect/BlockSelectHalfT512.cu
@@ -9,6 +9,8 @@
 
 namespace faiss { namespace gpu {
 
+#ifdef FAISS_USE_FLOAT16
 BLOCK_SELECT_IMPL(half, true, 512, 8);
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalf1.cu b/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalf1.cu
index 79876207f7..da3206d454 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalf1.cu
+++ b/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalf1.cu
@@ -9,7 +9,9 @@
 
 namespace faiss { namespace gpu {
 
+#ifdef FAISS_USE_FLOAT16
 WARP_SELECT_IMPL(half, true, 1, 1);
 WARP_SELECT_IMPL(half, false, 1, 1);
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalf128.cu b/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalf128.cu
index 150c9507da..8705e593c5 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalf128.cu
+++ b/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalf128.cu
@@ -9,7 +9,9 @@
 
 namespace faiss { namespace gpu {
 
+#ifdef FAISS_USE_FLOAT16
 WARP_SELECT_IMPL(half, true, 128, 3);
 WARP_SELECT_IMPL(half, false, 128, 3);
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalf256.cu b/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalf256.cu
index cd8b49b18f..a7af219582 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalf256.cu
+++ b/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalf256.cu
@@ -9,7 +9,9 @@
 
 namespace faiss { namespace gpu {
 
+#ifdef FAISS_USE_FLOAT16
 WARP_SELECT_IMPL(half, true, 256, 4);
 WARP_SELECT_IMPL(half, false, 256, 4);
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalf32.cu b/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalf32.cu
index ce1b7e4c74..d7ed389aec 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalf32.cu
+++ b/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalf32.cu
@@ -9,7 +9,9 @@
 
 namespace faiss { namespace gpu {
 
+#ifdef FAISS_USE_FLOAT16
 WARP_SELECT_IMPL(half, true, 32, 2);
 WARP_SELECT_IMPL(half, false, 32, 2);
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalf64.cu b/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalf64.cu
index 9d4311ec01..fea6c40b9c 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalf64.cu
+++ b/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalf64.cu
@@ -9,7 +9,9 @@
 
 namespace faiss { namespace gpu {
 
+#ifdef FAISS_USE_FLOAT16
 WARP_SELECT_IMPL(half, true, 64, 3);
 WARP_SELECT_IMPL(half, false, 64, 3);
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalfF1024.cu b/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalfF1024.cu
index 0241300141..d99eea9c7c 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalfF1024.cu
+++ b/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalfF1024.cu
@@ -9,6 +9,8 @@
 
 namespace faiss { namespace gpu {
 
+#ifdef FAISS_USE_FLOAT16
 WARP_SELECT_IMPL(half, false, 1024, 8);
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalfF2048.cu b/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalfF2048.cu
index 1a16ee45c9..030d28e17f 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalfF2048.cu
+++ b/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalfF2048.cu
@@ -11,7 +11,9 @@
 namespace faiss { namespace gpu {
 
 #if GPU_MAX_SELECTION_K >= 2048
+#ifdef FAISS_USE_FLOAT16
 WARP_SELECT_IMPL(half, false, 2048, 8);
 #endif
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalfF512.cu b/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalfF512.cu
index 4cb138837b..651d727580 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalfF512.cu
+++ b/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalfF512.cu
@@ -9,6 +9,8 @@
 
 namespace faiss { namespace gpu {
 
+#ifdef FAISS_USE_FLOAT16
 WARP_SELECT_IMPL(half, false, 512, 8);
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalfT1024.cu b/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalfT1024.cu
index 6a95007ff8..5a576d7c48 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalfT1024.cu
+++ b/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalfT1024.cu
@@ -9,6 +9,8 @@
 
 namespace faiss { namespace gpu {
 
+#ifdef FAISS_USE_FLOAT16
 WARP_SELECT_IMPL(half, true, 1024, 8);
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalfT2048.cu b/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalfT2048.cu
index 94586d0100..b5bd1f9e53 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalfT2048.cu
+++ b/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalfT2048.cu
@@ -11,7 +11,9 @@
 namespace faiss { namespace gpu {
 
 #if GPU_MAX_SELECTION_K >= 2048
+#ifdef FAISS_USE_FLOAT16
 WARP_SELECT_IMPL(half, true, 2048, 8);
 #endif
+#endif
 
 } } // namespace
diff --git a/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalfT512.cu b/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalfT512.cu
index 6ca08a16ab..21b8660273 100644
--- a/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalfT512.cu
+++ b/core/src/index/thirdparty/faiss/gpu/utils/warpselect/WarpSelectHalfT512.cu
@@ -9,6 +9,8 @@
 
 namespace faiss { namespace gpu {
 
+#ifdef FAISS_USE_FLOAT16
 WARP_SELECT_IMPL(half, true, 512, 8);
+#endif
 
 } } // namespace