Skip to content

Commit 168e6e1

Browse files
committed
Implement reindex command
Ref: #22
1 parent 40c3ccf commit 168e6e1

File tree

9 files changed

+371
-10
lines changed

9 files changed

+371
-10
lines changed

README.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,14 @@ The chart is deleted from the repo:
110110
## Uninstall
111111

112112
$ helm plugin remove s3
113-
113+
114+
## Documentation
115+
116+
Additional documentation is available in the [docs](docs) directory. This currently includes:
117+
- estimated [usage cost calculation](docs/usage-cost.md)
118+
- [best practices](docs/best-practice.md)
119+
for organizing your repositories.
120+
114121
## Contributing
115122

116123
Contributions are welcome. Please see [these instructions](.github/CONTRIBUTING.md)

cmd/helms3/delete.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ func runDelete(name, version, repoName string) error {
5959
if err := storage.Delete(ctx, uri); err != nil {
6060
return errors.WithMessage(err, "delete chart file from s3")
6161
}
62-
if _, err := storage.Upload(ctx, repoEntry.URL+"/index.yaml", idxReader); err != nil {
62+
if err := storage.PutIndex(ctx, repoEntry.URL, idxReader); err != nil {
6363
return errors.WithMessage(err, "upload new index to s3")
6464
}
6565

cmd/helms3/main.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ const (
1717
actionVersion = "version"
1818
actionInit = "init"
1919
actionPush = "push"
20+
actionReindex = "reindex"
2021
actionDelete = "delete"
2122

2223
defaultTimeout = time.Second * 5
@@ -46,6 +47,11 @@ func main() {
4647
Required().
4748
String()
4849

50+
reindexCmd := cli.Command(actionReindex, "Reindex the repository.")
51+
reindexTargetRepository := reindexCmd.Arg("repo", "Target repository to reindex").
52+
Required().
53+
String()
54+
4955
deleteCmd := cli.Command(actionDelete, "Delete chart from the repository.").Alias("del")
5056
deleteChartName := deleteCmd.Arg("chartName", "Name of chart to delete").
5157
Required().
@@ -81,6 +87,11 @@ func main() {
8187
}
8288
return
8389

90+
case actionReindex:
91+
if err := runReindex(*reindexTargetRepository); err != nil {
92+
log.Fatal(err)
93+
}
94+
8495
case actionDelete:
8596
if err := runDelete(*deleteChartName, *deleteChartVersion, *deleteTargetRepository); err != nil {
8697
log.Fatal(err)

cmd/helms3/proxy.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ package main
33
import (
44
"context"
55
"fmt"
6+
"path"
7+
"strings"
68

79
"github.com/pkg/errors"
810

@@ -18,6 +20,9 @@ func runProxy(uri string) error {
1820

1921
b, err := storage.FetchRaw(ctx, uri)
2022
if err != nil {
23+
if strings.HasSuffix(uri, "index.yaml") && err == awss3.ErrObjectNotFound {
24+
return fmt.Errorf("The index file does not exist by the path %s. If you haven't initialized the repository yet, try running \"helm s3 init %s\"", uri, path.Dir(uri))
25+
}
2126
return errors.WithMessage(err, "fetch from s3")
2227
}
2328

cmd/helms3/push.go

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package main
22

33
import (
44
"context"
5+
"encoding/json"
56
"fmt"
67
"os"
78
"path/filepath"
@@ -52,25 +53,29 @@ func runPush(chartPath string, repoName string) error {
5253
return err
5354
}
5455

56+
hash, err := provenance.DigestFile(fname)
57+
if err != nil {
58+
return errors.WithMessage(err, "get chart digest")
59+
}
60+
5561
fchart, err := os.Open(fname)
5662
if err != nil {
5763
return errors.Wrap(err, "open chart file")
5864
}
5965

60-
if _, err := storage.Upload(ctx, repoEntry.URL+"/"+fname, fchart); err != nil {
66+
serializedChartMeta, err := json.Marshal(chart.Metadata)
67+
if err != nil {
68+
return errors.Wrap(err, "encode chart metadata to json")
69+
}
70+
71+
if _, err := storage.PutChart(ctx, repoEntry.URL+"/"+fname, fchart, string(serializedChartMeta), hash); err != nil {
6172
return errors.WithMessage(err, "upload chart to s3")
6273
}
6374

64-
// Next, update the repository index.
6575
// The gap between index fetching and uploading should be as small as
6676
// possible to make the best effort to avoid race conditions.
6777
// See https://github.com/hypnoglow/helm-s3/issues/18 for more info.
6878

69-
hash, err := provenance.DigestFile(fname)
70-
if err != nil {
71-
return errors.WithMessage(err, "get chart digest")
72-
}
73-
7479
// Fetch current index, update it and upload it back.
7580

7681
b, err := storage.FetchRaw(ctx, repoEntry.URL+"/index.yaml")
@@ -91,7 +96,7 @@ func runPush(chartPath string, repoName string) error {
9196
return errors.WithMessage(err, "get index reader")
9297
}
9398

94-
if _, err := storage.Upload(ctx, repoEntry.URL+"/index.yaml", idxReader); err != nil {
99+
if err := storage.PutIndex(ctx, repoEntry.URL, idxReader); err != nil {
95100
return errors.WithMessage(err, "upload index to s3")
96101
}
97102

cmd/helms3/reindex.go

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
package main
2+
3+
import (
4+
"context"
5+
"time"
6+
7+
"github.com/pkg/errors"
8+
9+
"github.com/hypnoglow/helm-s3/pkg/awss3"
10+
"github.com/hypnoglow/helm-s3/pkg/awsutil"
11+
"github.com/hypnoglow/helm-s3/pkg/helmutil"
12+
"github.com/hypnoglow/helm-s3/pkg/index"
13+
)
14+
15+
const (
16+
reindexCommandDefaultTimeput = time.Second * 15
17+
)
18+
19+
func runReindex(repoName string) error {
20+
// Just one big timeout for the whole operation.
21+
ctx, cancel := context.WithTimeout(context.Background(), reindexCommandDefaultTimeput)
22+
defer cancel()
23+
24+
ctx = ctx
25+
26+
repoEntry, err := helmutil.LookupRepoEntry(repoName)
27+
if err != nil {
28+
return err
29+
}
30+
31+
awsConfig, err := awsutil.Config()
32+
if err != nil {
33+
return errors.Wrap(err, "get aws config")
34+
}
35+
36+
storage := awss3.NewStorage(awsConfig)
37+
38+
items := make(chan awss3.ChartInfo, 1)
39+
errs := make(chan error, 1)
40+
41+
go storage.Traverse(context.TODO(), repoEntry.URL, items, errs)
42+
43+
builtIndex := make(chan *index.Index, 1)
44+
go func() {
45+
idx := index.New()
46+
for item := range items {
47+
idx.Add(item.Meta, item.Filename, repoEntry.URL, item.Hash)
48+
}
49+
idx.SortEntries()
50+
51+
builtIndex <- idx
52+
}()
53+
54+
for err := range errs {
55+
return errors.Wrap(err, "traverse the chart repository")
56+
}
57+
58+
idx := <-builtIndex
59+
60+
r, err := idx.Reader()
61+
if err != nil {
62+
return errors.Wrap(err, "get index reader")
63+
}
64+
65+
if err := storage.PutIndex(context.TODO(), repoEntry.URL, r); err != nil {
66+
return errors.Wrap(err, "upload index to the repository")
67+
}
68+
69+
return nil
70+
}

docs/best-practice.md

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# Best Practice
2+
3+
## Reindexing your repository
4+
5+
In short, due to limitations of AWS your chart repository index can be broken
6+
by accident. This means that it may not reflect the "real" state of your chart
7+
files in S3 bucket. Nothing serious, but can be annoying.
8+
9+
To workaround this, the `helm s3 reindex <repo>` command is available. *Note: this
10+
operation is is [much more expensive](usage-cost.md#reindex) than other in this plugin*.
11+
12+
## Organizing your repositories
13+
14+
A chart repository file structure is always flat.
15+
It cannot contain nested directories.
16+
17+
The number of AWS S3 requests for reindex operation depends on your repository structure.
18+
Due to limitations of AWS S3 API you cannot list objects of the folder under the key
19+
excluding subfolders. `ListObjects` only can lists objects under the key recursively.
20+
21+
The plugin code makes its best to ignore subfolders, because chart repository is always flat.
22+
But still, not all cases are covered.
23+
24+
Imagine the worst case scenario: you have 100 chart files in your repository, which is the
25+
bucket root. And 1 million files in the "foo-bar" subfolder, which are not related to
26+
the chart repository. In this case the plugin **have to** call `ListObjects`
27+
about 1000 times (1000 objects per call) to make sure it did not miss any chart file.
28+
29+
By that, the golden rule is to **never have subfolders in your chart repository folder**.
30+
31+
So, there are two good options for your chart repository file structure inside S3 bucket:
32+
33+
1. One bucket - one repository. Create a bucket "yourcompany-charts-stable", or
34+
"yourcompany-productname-charts" and use the bucket root as your chart repository.
35+
In this case, never put any other files in that bucket.
36+
37+
2. One bucket - many repositories, each in separate subfolder. Create a bucket
38+
"yourcompany-charts". Create a subfolder in it for each repository you need, for
39+
example "stable" and "testing". Another option is to separate the repositories
40+
by the product or by group of services, for example "backoffice", "order-processing", etc.
41+
And again, never put any other files in the repository folder.

docs/usage-cost.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Usage pricing
2+
3+
I hope this document helps you to calculate the AWS S3 usage cost for your use case.
4+
5+
Disclaimer: the plugin author is not responsible for your unexpected expenses.
6+
7+
**Make sure to consult the pricing for your region [here](https://aws.amazon.com/s3/pricing)!**
8+
9+
## Reindex
10+
11+
`helm s3 reindex <repo>` command is much more expensive operation than other in
12+
this plugin. For example, reindexing a repository with 1000 chart files in it
13+
results in 1 GET (`ListObjects`) request and 1000 HEAD (`HeadObject`) requests.
14+
Plus it can make additional GET (`GetObject`) requests if it did not found
15+
required metadata in the HEAD request response.
16+
17+
At the moment of writing this document the price for HEAD/GET requests in `eu-central-1` is `$0.0043 for 10 000 requests`.
18+
So the whole reindex operation for this case may cost approximately **$0.00043** or even **$0.00086**.
19+
This seems small, but multiple reindex operations per day may hurt your budget.

0 commit comments

Comments
 (0)