diff --git a/internal/cmd/exec.go b/internal/cmd/exec.go index 7ed15cd..1d7d6ef 100644 --- a/internal/cmd/exec.go +++ b/internal/cmd/exec.go @@ -49,6 +49,39 @@ func (exec Executor) listPGLogFiles(numLogs int) (string, string, error) { return stdout.String(), stderr.String(), err } +// listPGConfFiles returns the full path of Postgres conf files. +// These are the *.conf stored on the Postgres instance +func (exec Executor) listPGConfFiles() (string, string, error) { + var stdout, stderr bytes.Buffer + + command := "ls -1dt pgdata/pg[0-9][0-9]/*.conf" + err := exec(nil, &stdout, &stderr, "bash", "-ceu", "--", command) + + return stdout.String(), stderr.String(), err +} + +// listBackrestLogFiles returns the full path of pgBackRest log files. +// These are the pgBackRest logs stored on the Postgres instance +func (exec Executor) listBackrestLogFiles() (string, string, error) { + var stdout, stderr bytes.Buffer + + command := "ls -1dt pgdata/pgbackrest/log/*" + err := exec(nil, &stdout, &stderr, "bash", "-ceu", "--", command) + + return stdout.String(), stderr.String(), err +} + +// listBackrestRepoHostLogFiles returns the full path of pgBackRest log files. +// These are the pgBackRest logs stored on the repo host +func (exec Executor) listBackrestRepoHostLogFiles() (string, string, error) { + var stdout, stderr bytes.Buffer + + command := "ls -1dt pgbackrest/*/log/*" + err := exec(nil, &stdout, &stderr, "bash", "-ceu", "--", command) + + return stdout.String(), stderr.String(), err +} + // catFile takes the full path of a file and returns the contents // of that file func (exec Executor) catFile(filePath string) (string, string, error) { diff --git a/internal/cmd/export.go b/internal/cmd/export.go index 7113ac8..b27b6f4 100644 --- a/internal/cmd/export.go +++ b/internal/cmd/export.go @@ -446,12 +446,23 @@ Collecting PGO CLI logs... } // Logs + // All Postgres Logs on the Postgres Instances (primary and replicas) if numLogs > 0 { if err == nil { - err = gatherPostgresqlLogs(ctx, clientset, restConfig, namespace, clusterName, numLogs, tw, cmd) + err = gatherPostgresLogsAndConfigs(ctx, clientset, restConfig, namespace, clusterName, numLogs, tw, cmd) } } + // All pgBackRest Logs on the Postgres Instances + if err == nil { + err = gatherDbBackrestLogs(ctx, clientset, restConfig, namespace, clusterName, tw, cmd) + } + + // All pgBackRest Logs on the Repo Host + if err == nil { + err = gatherRepoHostLogs(ctx, clientset, restConfig, namespace, clusterName, tw, cmd) + } + // get PostgresCluster Pod logs if err == nil { writeInfo(cmd, "Collecting PostgresCluster pod logs...") @@ -516,7 +527,7 @@ Collecting PGO CLI logs... // Print cli output writeInfo(cmd, "Collecting PGO CLI logs...") - path := clusterName + "/logs/cli" + path := clusterName + "/cli.log" if logErr := writeTar(tw, cliOutput.Bytes(), path, cmd); logErr != nil { return logErr } @@ -937,8 +948,9 @@ func gatherEvents(ctx context.Context, return nil } -// gatherLogs takes a client and buffer to write logs to a buffer -func gatherPostgresqlLogs(ctx context.Context, +// gatherPostgresLogsAndConfigs take a client and writes logs and configs +// from primary and replicas to a buffer +func gatherPostgresLogsAndConfigs(ctx context.Context, clientset *kubernetes.Clientset, config *rest.Config, namespace string, @@ -948,11 +960,11 @@ func gatherPostgresqlLogs(ctx context.Context, cmd *cobra.Command, ) error { writeInfo(cmd, "Collecting Postgres logs...") - // Get the primary instance Pod by its labels - pods, err := clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{ - // TODO(jmckulk): should we be getting replica logs? - LabelSelector: util.PrimaryInstanceLabels(clusterName), + + dbPods, err := clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{ + LabelSelector: util.DBInstanceLabels(clusterName), }) + if err != nil { if apierrors.IsForbidden(err) { writeInfo(cmd, err.Error()) @@ -960,23 +972,161 @@ func gatherPostgresqlLogs(ctx context.Context, } return err } - if len(pods.Items) != 1 { - writeInfo(cmd, "No primary instance pod found for gathering logs") + + if len(dbPods.Items) == 0 { + writeInfo(cmd, "No database instance pod found for gathering logs and config") return nil } + writeDebug(cmd, fmt.Sprintf("Found %d Pods\n", len(dbPods.Items))) + podExec, err := util.NewPodExecutor(config) if err != nil { return err } - exec := func(stdin io.Reader, stdout, stderr io.Writer, command ...string, - ) error { - return podExec(namespace, pods.Items[0].GetName(), util.ContainerDatabase, - stdin, stdout, stderr, command...) + for _, pod := range dbPods.Items { + writeDebug(cmd, fmt.Sprintf("Pod Name is %s\n", pod.Name)) + + exec := func(stdin io.Reader, stdout, stderr io.Writer, command ...string, + ) error { + return podExec(namespace, pod.Name, util.ContainerDatabase, + stdin, stdout, stderr, command...) + } + + // Get Postgres Log Files + stdout, stderr, err := Executor(exec).listPGLogFiles(numLogs) + + // Depending upon the list* function above: + // An error may happen when err is non-nil or stderr is non-empty. + // In both cases, we want to print helpful information and continue to the + // next iteration. + if err != nil || stderr != "" { + + if apierrors.IsForbidden(err) { + writeInfo(cmd, err.Error()) + return nil + } + + writeDebug(cmd, "Error getting PG logs\n") + + if err != nil { + writeDebug(cmd, fmt.Sprintf("%s\n", err.Error())) + } + if stderr != "" { + writeDebug(cmd, stderr) + } + + if strings.Contains(stderr, "No such file or directory") { + writeDebug(cmd, "Cannot find any Postgres log files. This is acceptable in some configurations.\n") + } + continue + } + + logFiles := strings.Split(strings.TrimSpace(stdout), "\n") + for _, logFile := range logFiles { + writeDebug(cmd, fmt.Sprintf("LOG FILE: %s\n", logFile)) + var buf bytes.Buffer + + stdout, stderr, err := Executor(exec).catFile(logFile) + if err != nil { + if apierrors.IsForbidden(err) { + writeInfo(cmd, err.Error()) + // Continue and output errors for each log file + // Allow the user to see and address all issues at once + continue + } + return err + } + + buf.Write([]byte(stdout)) + if stderr != "" { + str := fmt.Sprintf("\nError returned: %s\n", stderr) + buf.Write([]byte(str)) + } + + path := clusterName + fmt.Sprintf("/pods/%s/", pod.Name) + logFile + if err := writeTar(tw, buf.Bytes(), path, cmd); err != nil { + return err + } + } + + // Get Postgres Conf Files + stdout, stderr, err = Executor(exec).listPGConfFiles() + + // Depending upon the list* function above: + // An error may happen when err is non-nil or stderr is non-empty. + // In both cases, we want to print helpful information and continue to the + // next iteration. + if err != nil || stderr != "" { + + if apierrors.IsForbidden(err) { + writeInfo(cmd, err.Error()) + return nil + } + + writeDebug(cmd, "Error getting PG Conf files\n") + + if err != nil { + writeDebug(cmd, fmt.Sprintf("%s\n", err.Error())) + } + if stderr != "" { + writeDebug(cmd, stderr) + } + + if strings.Contains(stderr, "No such file or directory") { + writeDebug(cmd, "Cannot find any PG Conf files. This is acceptable in some configurations.\n") + } + continue + } + + logFiles = strings.Split(strings.TrimSpace(stdout), "\n") + for _, logFile := range logFiles { + var buf bytes.Buffer + + stdout, stderr, err := Executor(exec).catFile(logFile) + if err != nil { + if apierrors.IsForbidden(err) { + writeInfo(cmd, err.Error()) + // Continue and output errors for each log file + // Allow the user to see and address all issues at once + continue + } + return err + } + + buf.Write([]byte(stdout)) + if stderr != "" { + str := fmt.Sprintf("\nError returned: %s\n", stderr) + buf.Write([]byte(str)) + } + + path := clusterName + fmt.Sprintf("/pods/%s/", pod.Name) + logFile + if err := writeTar(tw, buf.Bytes(), path, cmd); err != nil { + return err + } + } + } + return nil +} + +// gatherDbBackrestLogs gathers all the file-based pgBackRest logs on the DB instance. +// There may not be any logs depending upon pgBackRest's log-level-file. +func gatherDbBackrestLogs(ctx context.Context, + clientset *kubernetes.Clientset, + config *rest.Config, + namespace string, + clusterName string, + tw *tar.Writer, + cmd *cobra.Command, +) error { + writeInfo(cmd, "Collecting pgBackRest logs...") + + dbPods, err := clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{ + LabelSelector: util.DBInstanceLabels(clusterName), + }) - stdout, stderr, err := Executor(exec).listPGLogFiles(numLogs) if err != nil { if apierrors.IsForbidden(err) { writeInfo(cmd, err.Error()) @@ -984,37 +1134,191 @@ func gatherPostgresqlLogs(ctx context.Context, } return err } - if stderr != "" { - writeInfo(cmd, stderr) + + if len(dbPods.Items) == 0 { + writeInfo(cmd, "No database instance pod found for gathering logs") + return nil } - logFiles := strings.Split(strings.TrimSpace(stdout), "\n") - for _, logFile := range logFiles { - var buf bytes.Buffer + writeDebug(cmd, fmt.Sprintf("Found %d Pods\n", len(dbPods.Items))) + + podExec, err := util.NewPodExecutor(config) + if err != nil { + return err + } + + for _, pod := range dbPods.Items { + writeDebug(cmd, fmt.Sprintf("Pod Name is %s\n", pod.Name)) + + exec := func(stdin io.Reader, stdout, stderr io.Writer, command ...string, + ) error { + return podExec(namespace, pod.Name, util.ContainerDatabase, + stdin, stdout, stderr, command...) + } + + // Get pgBackRest Log Files + stdout, stderr, err := Executor(exec).listBackrestLogFiles() + + // Depending upon the list* function above: + // An error may happen when err is non-nil or stderr is non-empty. + // In both cases, we want to print helpful information and continue to the + // next iteration. + if err != nil || stderr != "" { - stdout, stderr, err := Executor(exec).catFile(logFile) - if err != nil { if apierrors.IsForbidden(err) { writeInfo(cmd, err.Error()) - // Continue and output errors for each log file - // Allow the user to see and address all issues at once - continue + return nil } - return err + + writeDebug(cmd, "Error getting pgBackRest logs\n") + + if err != nil { + writeDebug(cmd, fmt.Sprintf("%s\n", err.Error())) + } + if stderr != "" { + writeDebug(cmd, stderr) + } + + if strings.Contains(stderr, "No such file or directory") { + writeDebug(cmd, "Cannot find any pgBackRest log files. This is acceptable in some configurations.\n") + } + continue } - buf.Write([]byte(stdout)) - if stderr != "" { - str := fmt.Sprintf("\nError returned: %s\n", stderr) - buf.Write([]byte(str)) + logFiles := strings.Split(strings.TrimSpace(stdout), "\n") + for _, logFile := range logFiles { + writeDebug(cmd, fmt.Sprintf("LOG FILE: %s\n", logFile)) + var buf bytes.Buffer + + stdout, stderr, err := Executor(exec).catFile(logFile) + if err != nil { + if apierrors.IsForbidden(err) { + writeInfo(cmd, err.Error()) + // Continue and output errors for each log file + // Allow the user to see and address all issues at once + continue + } + return err + } + + buf.Write([]byte(stdout)) + if stderr != "" { + str := fmt.Sprintf("\nError returned: %s\n", stderr) + buf.Write([]byte(str)) + } + + path := clusterName + fmt.Sprintf("/pods/%s/", pod.Name) + logFile + if err := writeTar(tw, buf.Bytes(), path, cmd); err != nil { + return err + } } - path := clusterName + "/logs/postgresql/" + logFile - if err := writeTar(tw, buf.Bytes(), path, cmd); err != nil { - return err + } + return nil +} + +// gatherRepoHostLogs gathers all the file-based pgBackRest logs on the repo host. +// There may not be any logs depending upon pgBackRest's log-level-file. +func gatherRepoHostLogs(ctx context.Context, + clientset *kubernetes.Clientset, + config *rest.Config, + namespace string, + clusterName string, + tw *tar.Writer, + cmd *cobra.Command, +) error { + writeInfo(cmd, "Collecting pgBackRest Repo Host logs...") + + repoHostPods, err := clientset.CoreV1().Pods(namespace).List(ctx, metav1.ListOptions{ + LabelSelector: util.RepoHostInstanceLabels(clusterName), + }) + + if err != nil { + if apierrors.IsForbidden(err) { + writeInfo(cmd, err.Error()) + return nil } + return err + } + + if len(repoHostPods.Items) == 0 { + writeInfo(cmd, "No Repo Host pod found for gathering logs") + } + + writeDebug(cmd, fmt.Sprintf("Found %d Repo Host Pod\n", len(repoHostPods.Items))) + + podExec, err := util.NewPodExecutor(config) + if err != nil { + return err } + for _, pod := range repoHostPods.Items { + writeDebug(cmd, fmt.Sprintf("Pod Name is %s\n", pod.Name)) + + exec := func(stdin io.Reader, stdout, stderr io.Writer, command ...string, + ) error { + return podExec(namespace, pod.Name, util.ContainerPGBackrest, + stdin, stdout, stderr, command...) + } + + // Get BackRest Repo Host Log Files + stdout, stderr, err := Executor(exec).listBackrestRepoHostLogFiles() + + // Depending upon the list* function above: + // An error may happen when err is non-nil or stderr is non-empty. + // In both cases, we want to print helpful information and continue to the + // next iteration. + if err != nil || stderr != "" { + + if apierrors.IsForbidden(err) { + writeInfo(cmd, err.Error()) + return nil + } + + writeDebug(cmd, "Error getting pgBackRest logs\n") + + if err != nil { + writeDebug(cmd, fmt.Sprintf("%s\n", err.Error())) + } + if stderr != "" { + writeDebug(cmd, stderr) + } + + if strings.Contains(stderr, "No such file or directory") { + writeDebug(cmd, "Cannot find any pgBackRest log files. This is acceptable in some configurations.\n") + } + continue + } + + logFiles := strings.Split(strings.TrimSpace(stdout), "\n") + for _, logFile := range logFiles { + writeDebug(cmd, fmt.Sprintf("LOG FILE: %s\n", logFile)) + var buf bytes.Buffer + + stdout, stderr, err := Executor(exec).catFile(logFile) + if err != nil { + if apierrors.IsForbidden(err) { + writeInfo(cmd, err.Error()) + // Continue and output errors for each log file + // Allow the user to see and address all issues at once + continue + } + return err + } + + buf.Write([]byte(stdout)) + if stderr != "" { + str := fmt.Sprintf("\nError returned: %s\n", stderr) + buf.Write([]byte(str)) + } + + path := clusterName + fmt.Sprintf("/pods/%s/", pod.Name) + logFile + if err := writeTar(tw, buf.Bytes(), path, cmd); err != nil { + return err + } + } + + } return nil } @@ -1070,8 +1374,8 @@ func gatherPodLogs(ctx context.Context, return err } - path := rootDir + "/logs/" + - pod.GetName() + "_" + container.Name + ".log" + path := rootDir + "/pods/" + + pod.GetName() + "/containers/" + container.Name + ".log" if err := writeTar(tw, b, path, cmd); err != nil { return err } diff --git a/internal/cmd/show.go b/internal/cmd/show.go index 3536004..f8d015e 100644 --- a/internal/cmd/show.go +++ b/internal/cmd/show.go @@ -95,7 +95,7 @@ HA if stdout, stderr, err := getBackup(config, args, "text", ""); err != nil { return err } else { - cmd.Printf(stdout) + cmd.Printf("%s", stdout) if stderr != "" { cmd.Printf("\nError returned: %s\n", stderr) } @@ -106,7 +106,7 @@ HA if stdout, stderr, err := getHA(config, args, "pretty"); err != nil { return err } else { - cmd.Printf(stdout) + cmd.Printf("%s", stdout) if stderr != "" { cmd.Printf("\nError returned: %s\n", stderr) } @@ -185,7 +185,7 @@ stanza: db stdout, stderr, err := getBackup(config, args, outputEnum.String(), repoNum) if err == nil { - cmd.Printf(stdout) + cmd.Printf("%s", stdout) if stderr != "" { cmd.Printf("\nError returned: %s\n", stderr) } @@ -257,7 +257,7 @@ pgo show ha hippo --output json stdout, stderr, err := getHA(config, args, outputEnum.String()) if err == nil { - cmd.Printf(stdout) + cmd.Printf("%s", stdout) if stderr != "" { cmd.Printf("\nError returned: %s\n", stderr) } diff --git a/internal/cmd/stop.go b/internal/cmd/stop.go index 0e3226a..9b38f59 100644 --- a/internal/cmd/stop.go +++ b/internal/cmd/stop.go @@ -91,7 +91,7 @@ postgresclusters/hippo stop initiated`) msg, err := patchClusterShutdown(cluster, client, requestArgs) if msg != "" { - cmd.Printf(msg) + cmd.Printf("%s", msg) } if err != nil { return err diff --git a/internal/util/naming.go b/internal/util/naming.go index 82c3ff9..3d5f538 100644 --- a/internal/util/naming.go +++ b/internal/util/naming.go @@ -34,6 +34,9 @@ const ( // LabelOperator is used to identify operator Pods LabelOperator = "postgres-operator.crunchydata.com/control-plane" + + // LabelPGBackRestDedicated is used to identify the Repo Host pod + LabelPGBackRestDedicated = labelPrefix + "pgbackrest-dedicated" ) const ( @@ -41,6 +44,9 @@ const ( // DataPostgres is a LabelData value that indicates the object has PostgreSQL data. DataPostgres = "postgres" + + // DataBackrest is a LabelData value that indicate the object is a Repo Host. + DataBackrest = "pgbackrest" ) const ( @@ -50,6 +56,10 @@ const ( // currently the leader. RolePatroniLeader = "master" + // RolePatroniReplica is the LabelRole that Patroni sets on the Pod that is + // currently a replica. + RolePatroniReplica = "replica" + // RolePostgresUser is the LabelRole applied to PostgreSQL user secrets. RolePostgresUser = "pguser" ) @@ -60,8 +70,16 @@ const ( // ContainerDatabase is the name of the container running PostgreSQL and // supporting tools: Patroni, pgBackRest, etc. ContainerDatabase = "database" + + ContainerPGBackrest = "pgbackrest" ) +// DBInstanceLabels provides labels for a PostgreSQL cluster primary or replica instance +func DBInstanceLabels(clusterName string) string { + return LabelCluster + "=" + clusterName + "," + + LabelData + "=" + DataPostgres +} + // PrimaryInstanceLabels provides labels for a PostgreSQL cluster primary instance func PrimaryInstanceLabels(clusterName string) string { return LabelCluster + "=" + clusterName + "," + @@ -69,6 +87,12 @@ func PrimaryInstanceLabels(clusterName string) string { LabelRole + "=" + RolePatroniLeader } +// RepoHostInstanceLabels provides labels for a Backrest Repo Host instances +func RepoHostInstanceLabels(clusterName string) string { + return LabelCluster + "=" + clusterName + "," + + LabelPGBackRestDedicated + "=" +} + // PostgresUserSecretLabels provides labels for the Postgres user Secret func PostgresUserSecretLabels(clusterName string) string { return LabelCluster + "=" + clusterName + "," + diff --git a/testing/kuttl/e2e/support-export/01--support_export.yaml b/testing/kuttl/e2e/support-export/01--support_export.yaml index 3d1b0b4..d17dc89 100644 --- a/testing/kuttl/e2e/support-export/01--support_export.yaml +++ b/testing/kuttl/e2e/support-export/01--support_export.yaml @@ -128,7 +128,7 @@ commands: fi # check that the PGO CLI log file contains expected messages - CLI_LOG="./kuttl-support-cluster/logs/cli" + CLI_LOG="./kuttl-support-cluster/cli.log" # info output includes expected heading if ! grep -Fq -- "- INFO - | PGO CLI Support Export Tool" $CLI_LOG diff --git a/testing/kuttl/e2e/support-export/31--support_export.yaml b/testing/kuttl/e2e/support-export/31--support_export.yaml index 89deff0..1b71587 100644 --- a/testing/kuttl/e2e/support-export/31--support_export.yaml +++ b/testing/kuttl/e2e/support-export/31--support_export.yaml @@ -6,8 +6,8 @@ commands: - script: tar -xzf ./crunchy_k8s_support_export_*.tar.gz - script: | CLEANUP="rm -r ./kuttl-support-monitoring-cluster ./monitoring ./crunchy_k8s_support_export_*.tar.gz" - CLUSTER_DIR="./kuttl-support-monitoring-cluster/logs/" - MONITORING_DIR="./monitoring/logs/" + CLUSTER_DIR="./kuttl-support-monitoring-cluster/pods/" + MONITORING_DIR="./monitoring/pods/" # check for exporter, prometheus, grafana and alertmanager logs found=$(find ${CLUSTER_DIR} -name "*exporter.log" | wc -l)