enhance: add glog sink to transfer cgo log into zap (#46721)

issue: #45640

- After async logging, the C log and go log has no order promise,
meanwhile the C log format is not consistent with Go Log; so we close
the output of glog, just forward the log result operation into Go side
which will be handled by the async zap logger.
- Use CGO to filter all cgo logging and promise the order between c log
and go log.
- Also fix the metric name, add new metric to count the logging.
- TODO: after woodpecker use the logger of milvus, we can add bigger
buffer for logging.

<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
- Core invariant: all C (glog) and Go logs must be routed through the
same zap async pipeline so ordering and formatting are preserved; this
PR ensures every glog emission is captured and forwarded to zap before
any async buffering diverges the outputs.

- Logic removed/simplified: direct glog outputs and hard
stdout/stderr/log_dir settings are disabled (configs/glog.conf and flags
in internal/core/src/config/ConfigKnowhere.cpp) because they are
redundant once a single zap sink handles all logs; logging metrics were
simplified from per-length/volatile gauges to totalized counters
(pkg/metrics/logging_metrics.go & pkg/log/*), removing duplicate
length-tracking and making accounting consistent.

- No data loss or behavior regression (concrete code paths): Google
logging now adds a GoZapSink (internal/core/src/common/logging_c.h,
logging_c.cpp) that calls the exported CGO bridge goZapLogExt
(internal/util/cgo/logging/logging.go). Go side uses
C.GoStringN/C.GoString to capture full message and file, maps glog
severities to zapcore levels, preserves caller info, and writes via the
existing zap async core (same write path used by Go logs). The C++
send() trims glog's trailing newline and forwards exact buffers/lengths,
so message content, file, line, and severity are preserved and
serialized through the same async writer—no log entries are dropped or
reordered relative to Go logs.

- Capability added (where it takes effect): a CGO bridge that forwards
glog into zap—new Go-exported function goZapLogExt
(internal/util/cgo/logging/logging.go), a GoZapSink in C++ that forwards
glog sends (internal/core/src/common/logging_c.h/.cpp), and blank
imports of the cgo initializer across multiple packages (various
internal/* files) to ensure the bridge is registered early so all C logs
are captured.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

Signed-off-by: chyezh <chyezh@outlook.com>
This commit is contained in:
Zhen Ye 2026-01-04 14:45:23 +08:00 committed by GitHub
parent c585fbac72
commit 27525d57cc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
25 changed files with 298 additions and 35 deletions

View File

@ -22,6 +22,7 @@ import (
"os"
"github.com/milvus-io/milvus/cmd/milvus"
_ "github.com/milvus-io/milvus/internal/util/cgo"
)
//export startEmbedded

View File

@ -1,10 +1,5 @@
# if true, only log to stdout
--logtostdout=true
--logtostderr=false
--alsologtostderr=false
# `INFO``, ``WARNING``, ``ERROR``, and ``FATAL`` are 0, 1, 2, and 3
--minloglevel=0
--log_dir=/var/lib/milvus/logs/
# using vlog to implement debug and trace log
# if set vmodule to 5, open debug level
# if set vmodule to 6, open trace level

View File

@ -25,6 +25,10 @@ add_definitions(-DELPP_THREAD_SAFE)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
message( STATUS "Building using CMake version: ${CMAKE_VERSION}" )
if ( BUILD_UNIT_TEST STREQUAL "ON" )
add_definitions(-DWITHOUT_GO_LOGGING)
endif()
if ( MILVUS_GPU_VERSION )
add_definitions(-DMILVUS_GPU_VERSION)
endif ()

View File

@ -0,0 +1,62 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "logging_c.h"
#ifdef WITHOUT_GO_LOGGING
// Empty implementation when there's no go logging implementation.
void
goZapLogExt(
int severity, const char* file, int line, const char* msg, int msg_len) {
}
#elif defined(__APPLE__)
// Go export function.
// will be implemented in github.com/milvus-io/milvus/internal/util/cgo/logging
// macOS linker requires weak_import to allow unresolved symbols.
extern "C" void
goZapLogExt(
int severity, const char* file, int line, const char* msg, int msg_len) {
}
__attribute__((weak_import));
#else
// Go export function.
// will be implemented in github.com/milvus-io/milvus/internal/util/cgo/logging
extern "C" void
goZapLogExt(
int severity, const char* file, int line, const char* msg, int msg_len);
#endif
void
GoZapSink::send(google::LogSeverity severity,
const char* full_filename,
const char* base_filename,
int line,
const struct tm*,
const char* message,
size_t message_len) {
// remove the '\n' added by glog
int len = static_cast<int>(message_len);
if (len > 0 && message[len - 1] == '\n') {
len--;
}
goZapLogExt(static_cast<int>(severity), full_filename, line, message, len);
};

View File

@ -0,0 +1,29 @@
// Licensed to the LF AI& Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <glog/logging.h>
class GoZapSink : public google::LogSink {
void
send(google::LogSeverity severity,
const char* full_filename,
const char* base_filename,
int line,
const struct tm*,
const char* message,
size_t message_len) override;
};

View File

@ -22,11 +22,14 @@
#include "log/Log.h"
#include "knowhere/comp/knowhere_config.h"
#include "knowhere/version.h"
#include "common/logging_c.h"
namespace milvus::config {
std::once_flag init_knowhere_once_;
static GoZapSink g_sink;
void
KnowhereInitImpl(const char* conf_file) {
auto init = [&]() {
@ -35,6 +38,13 @@ KnowhereInitImpl(const char* conf_file) {
knowhere::KnowhereConfig::ShowVersion();
if (!google::IsGoogleLoggingInitialized()) {
google::InitGoogleLogging("milvus");
google::AddLogSink(&g_sink);
// log is catched by zap, so we don't need to log to stderr/stdout/files anymore.
FLAGS_logtostdout = false;
FLAGS_logtostderr = false;
FLAGS_alsologtostderr = false;
FLAGS_log_dir = "";
}
#ifdef EMBEDDED_MILVUS

View File

@ -31,6 +31,7 @@ import (
"path"
"unsafe"
_ "github.com/milvus-io/milvus/internal/util/cgo"
"github.com/milvus-io/milvus/internal/util/initcore"
"github.com/milvus-io/milvus/internal/util/pathutil"
"github.com/milvus-io/milvus/pkg/v2/util/hardware"

View File

@ -32,6 +32,7 @@ import (
"go.uber.org/zap"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
_ "github.com/milvus-io/milvus/internal/util/cgo"
"github.com/milvus-io/milvus/pkg/v2/log"
"github.com/milvus-io/milvus/pkg/v2/util/conc"
"github.com/milvus-io/milvus/pkg/v2/util/hardware"

View File

@ -55,6 +55,7 @@ import (
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/internal/types"
"github.com/milvus-io/milvus/internal/util/analyzer"
_ "github.com/milvus-io/milvus/internal/util/cgo"
"github.com/milvus-io/milvus/internal/util/dependency"
"github.com/milvus-io/milvus/internal/util/fileresource"
"github.com/milvus-io/milvus/internal/util/hookutil"

View File

@ -15,6 +15,7 @@ import (
"strconv"
"unsafe"
_ "github.com/milvus-io/milvus/internal/util/cgo"
"github.com/milvus-io/milvus/pkg/v2/proto/indexpb"
)

View File

@ -32,6 +32,7 @@ import (
"go.uber.org/zap"
"google.golang.org/protobuf/proto"
_ "github.com/milvus-io/milvus/internal/util/cgo"
"github.com/milvus-io/milvus/pkg/v2/log"
"github.com/milvus-io/milvus/pkg/v2/proto/clusteringpb"
)

View File

@ -12,6 +12,7 @@ import (
"unsafe"
"github.com/milvus-io/milvus/internal/util/analyzer/interfaces"
_ "github.com/milvus-io/milvus/internal/util/cgo"
)
var _ interfaces.Analyzer = (*CAnalyzer)(nil)

View File

@ -25,6 +25,7 @@ import (
"github.com/cockroachdb/errors"
_ "github.com/milvus-io/milvus/internal/util/cgo/logging"
"github.com/milvus-io/milvus/pkg/v2/util/merr"
)

View File

@ -0,0 +1,84 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package logging
/*
extern void goZapLogExt(int severity,
char* file,
int line,
char* msg,
int msg_len);
*/
import "C"
import (
"time"
"go.uber.org/zap/zapcore"
"github.com/milvus-io/milvus/pkg/v2/log"
"github.com/milvus-io/milvus/pkg/v2/metrics"
)
const cgoLoggerName = "CGO"
//export goZapLogExt
func goZapLogExt(sev C.int,
file *C.char,
line C.int,
msg *C.char,
msgLen C.int,
) {
lv := mapGlogSeverity(int(sev))
if !log.L().Core().Enabled(lv) {
return
}
ent := zapcore.Entry{
Level: mapGlogSeverity(int(sev)),
Time: time.Now(),
LoggerName: cgoLoggerName,
Message: C.GoStringN(msg, msgLen),
Caller: zapcore.EntryCaller{
Defined: true,
File: C.GoString(file),
Line: int(line),
},
}
if ce := log.L().Core().Check(ent, nil); ce != nil {
metrics.LoggingCGOWriteTotal.Inc()
metrics.LoggingCGOWriteBytes.Add(float64(msgLen))
ce.Write()
}
}
func mapGlogSeverity(s int) zapcore.Level {
switch s {
case 0: // GLOG_INFO
return zapcore.InfoLevel
case 1: // GLOG_WARNING
return zapcore.WarnLevel
case 2: // GLOG_ERROR
return zapcore.ErrorLevel
case 3: // GLOG_FATAL
// glog fatal will call std::abort,
// zap will call os.Exit(1),
// we don't want to double exit, so we use error level instead
return zapcore.ErrorLevel
default:
return zapcore.InfoLevel
}
}

View File

@ -0,0 +1,32 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package logging
import (
"testing"
"github.com/stretchr/testify/require"
"go.uber.org/zap/zapcore"
)
func TestLogging(t *testing.T) {
require.Equal(t, zapcore.InfoLevel, mapGlogSeverity(0))
require.Equal(t, zapcore.WarnLevel, mapGlogSeverity(1))
require.Equal(t, zapcore.ErrorLevel, mapGlogSeverity(2))
require.Equal(t, zapcore.ErrorLevel, mapGlogSeverity(3))
require.Equal(t, zapcore.InfoLevel, mapGlogSeverity(4))
}

View File

@ -10,6 +10,7 @@ import (
"sync/atomic"
"unsafe"
_ "github.com/milvus-io/milvus/internal/util/cgo"
"github.com/milvus-io/milvus/pkg/v2/util/typeutil"
)

View File

@ -23,6 +23,7 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/storage"
_ "github.com/milvus-io/milvus/internal/util/cgo"
"github.com/milvus-io/milvus/internal/util/segcore"
"github.com/milvus-io/milvus/pkg/v2/log"
"github.com/milvus-io/milvus/pkg/v2/proto/cgopb"

View File

@ -18,6 +18,7 @@ import (
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
_ "github.com/milvus-io/milvus/internal/util/cgo"
"github.com/milvus-io/milvus/internal/util/vecindexmgr"
"github.com/milvus-io/milvus/pkg/v2/common"
"github.com/milvus-io/milvus/pkg/v2/proto/indexcgopb"

View File

@ -41,6 +41,7 @@ import (
"go.uber.org/zap"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
_ "github.com/milvus-io/milvus/internal/util/cgo"
"github.com/milvus-io/milvus/internal/util/hookutil"
"github.com/milvus-io/milvus/internal/util/pathutil"
"github.com/milvus-io/milvus/pkg/v2/log"

View File

@ -43,6 +43,7 @@ import (
"golang.org/x/exp/maps"
"google.golang.org/protobuf/proto"
_ "github.com/milvus-io/milvus/internal/util/cgo"
"github.com/milvus-io/milvus/pkg/v2/log"
)

View File

@ -1,5 +1,9 @@
package segcore
import (
_ "github.com/milvus-io/milvus/internal/util/cgo"
)
/*
#cgo pkg-config: milvus_core

View File

@ -11,6 +11,7 @@ import (
"fmt"
"unsafe"
_ "github.com/milvus-io/milvus/internal/util/cgo"
"github.com/milvus-io/milvus/pkg/v2/log"
"github.com/milvus-io/milvus/pkg/v2/util/merr"
)

View File

@ -31,6 +31,7 @@ import (
"unsafe"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
_ "github.com/milvus-io/milvus/internal/util/cgo"
"github.com/milvus-io/milvus/pkg/v2/log"
)

View File

@ -130,10 +130,9 @@ func (s *asyncTextIOCore) Write(ent zapcore.Entry, fields []zapcore.Field) error
}
select {
case s.pending <- entry:
metrics.LoggingPendingWriteLength.Inc()
metrics.LoggingPendingWriteBytes.Add(float64(length))
metrics.LoggingPendingWriteTotal.Inc()
case <-writeDroppedTimeout:
metrics.LoggingDroppedWrites.Inc()
metrics.LoggingDroppedWriteTotal.Inc()
// drop the entry if the write is dropped due to timeout
buf.Free()
}
@ -165,15 +164,20 @@ func (s *asyncTextIOCore) background() {
// consumeEntry write the entry to the underlying buffered write syncer and free the buffer.
func (s *asyncTextIOCore) consumeEntry(ent *entryItem) {
length := ent.buf.Len()
metrics.LoggingPendingWriteLength.Dec()
metrics.LoggingPendingWriteBytes.Sub(float64(length))
metrics.LoggingPendingWriteTotal.Dec()
writes := s.getWriteBytes(ent)
if _, err := s.bws.Write(writes); err != nil {
metrics.LoggingIOFailure.Inc()
metrics.LoggingIOFailureTotal.Inc()
} else {
metrics.LoggingWriteTotal.Inc()
metrics.LoggingWriteBytes.Add(float64(length))
}
ent.buf.Free()
if ent.level > zapcore.ErrorLevel {
s.bws.Sync()
if err := s.bws.Sync(); err != nil {
metrics.LoggingIOFailureTotal.Inc()
}
}
}
@ -186,7 +190,7 @@ func (s *asyncTextIOCore) getWriteBytes(ent *entryItem) []byte {
if length > s.maxBytesPerLog {
// truncate the write if it exceeds the max bytes per log
metrics.LoggingTruncatedWrites.Inc()
metrics.LoggingTruncatedWriteTotal.Inc()
metrics.LoggingTruncatedWriteBytes.Add(float64(length - s.maxBytesPerLog))
end := writes[length-1]
@ -211,7 +215,7 @@ func (s *asyncTextIOCore) flushPendingWriteWithTimeout() {
func (s *asyncTextIOCore) flushAllPendingWrites(done chan struct{}) {
defer func() {
if err := s.bws.Stop(); err != nil {
metrics.LoggingIOFailure.Inc()
metrics.LoggingIOFailureTotal.Inc()
}
close(done)
}()

View File

@ -29,57 +29,81 @@ const (
var (
LoggingMetricsRegisterOnce sync.Once
LoggingPendingWriteLength = prometheus.NewGauge(prometheus.GaugeOpts{
LoggingPendingWriteTotal = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: milvusNamespace,
Subsystem: loggingMetricSubsystem,
Name: "pending_write_length",
Name: "pending_write_total",
Help: "The length of pending writes in the logging buffer",
})
LoggingPendingWriteBytes = prometheus.NewGauge(prometheus.GaugeOpts{
LoggingTruncatedWriteTotal = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: milvusNamespace,
Subsystem: loggingMetricSubsystem,
Name: "pending_write_bytes",
Help: "The total bytes of pending writes in the logging buffer",
})
LoggingTruncatedWrites = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: milvusNamespace,
Subsystem: loggingMetricSubsystem,
Name: "truncated_writes",
Name: "truncated_write_total",
Help: "The number of truncated writes due to exceeding the max bytes per log",
})
LoggingTruncatedWriteBytes = prometheus.NewGauge(prometheus.GaugeOpts{
LoggingTruncatedWriteBytes = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: milvusNamespace,
Subsystem: loggingMetricSubsystem,
Name: "truncated_write_bytes",
Help: "The total bytes of truncated writes due to exceeding the max bytes per log",
})
LoggingDroppedWrites = prometheus.NewGauge(prometheus.GaugeOpts{
LoggingDroppedWriteTotal = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: milvusNamespace,
Subsystem: loggingMetricSubsystem,
Name: "dropped_writes",
Name: "dropped_write_total",
Help: "The number of dropped writes due to buffer full or write timeout",
})
LoggingIOFailure = prometheus.NewGauge(prometheus.GaugeOpts{
LoggingIOFailureTotal = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: milvusNamespace,
Subsystem: loggingMetricSubsystem,
Name: "io_failures",
Name: "io_failure_total",
Help: "The number of IO failures due to underlying write syncer is blocked or write timeout",
})
LoggingWriteTotal = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: milvusNamespace,
Subsystem: loggingMetricSubsystem,
Name: "write_total",
Help: "The total number of writes",
})
LoggingWriteBytes = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: milvusNamespace,
Subsystem: loggingMetricSubsystem,
Name: "write_bytes",
Help: "The total bytes of written logs",
})
LoggingCGOWriteTotal = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: milvusNamespace,
Subsystem: loggingMetricSubsystem,
Name: "cgo_write_total",
Help: "The total number of CGO writes",
})
LoggingCGOWriteBytes = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: milvusNamespace,
Subsystem: loggingMetricSubsystem,
Name: "cgo_write_bytes",
Help: "The total bytes of CGO write logs, the bytes is calculated before encoding, only considers the length of the message, so the actual bytes may be greater than the value",
})
)
// RegisterLoggingMetrics registers logging metrics
func RegisterLoggingMetrics(registry *prometheus.Registry) {
LoggingMetricsRegisterOnce.Do(func() {
registry.MustRegister(LoggingPendingWriteLength)
registry.MustRegister(LoggingPendingWriteBytes)
registry.MustRegister(LoggingTruncatedWrites)
registry.MustRegister(LoggingPendingWriteTotal)
registry.MustRegister(LoggingTruncatedWriteTotal)
registry.MustRegister(LoggingTruncatedWriteBytes)
registry.MustRegister(LoggingDroppedWrites)
registry.MustRegister(LoggingIOFailure)
registry.MustRegister(LoggingDroppedWriteTotal)
registry.MustRegister(LoggingIOFailureTotal)
registry.MustRegister(LoggingWriteTotal)
registry.MustRegister(LoggingWriteBytes)
registry.MustRegister(LoggingCGOWriteTotal)
registry.MustRegister(LoggingCGOWriteBytes)
})
}