Skip to content

Commit bce15fc

Browse files
author
Stephane Royer
committed
feat: add AzureBlobOptions and IFileSaver interface; implement AzureStorageAccount provider and file handling
1 parent 91b9131 commit bce15fc

17 files changed

+432
-12
lines changed

documentation/docs/recipes/2_normalize.mdx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,14 +97,14 @@ The file would look like the following:
9797

9898
```csv title="post.csv"
9999
title,author,email,timestamp,category,link,post
100-
FundProcess features,Stéphane Royer,stephane.royer@fundprocess.lu,20210109113005,Category 2,https://www.fundprocess.lu/features/,
100+
coucou features,Stéphane Royer,stephane.royer@coucou.lu,20210109113005,Category 2,https://www.coucou.lu/features/,
101101
ETL.NET revealed,Paillave,[email protected],20210508181126,Category 2,,"This a post, about ETL.NET"
102102
ETL.NET page,Paillave,[email protected],20210504164510,Category 1,https://paillave.github.io/Etl.Net/,
103-
FundProcess presentation,Stéphane Royer,stephane.royer@fundprocess.lu,20210203124051,Category 2,,"This a ""post"", about FundProcess"
104-
FundProcess website,Stéphane Royer,stephane.royer@fundprocess.lu,20210106103005,Category 1,http://www.fundprocess.lu,
103+
coucou presentation,Stéphane Royer,stephane.royer@coucou.lu,20210203124051,Category 2,,"This a ""post"", about coucou"
104+
coucou website,Stéphane Royer,stephane.royer@coucou.lu,20210106103005,Category 1,http://www.coucou.lu,
105105
ETL.NET nuget,Paillave,[email protected],20200504164510,Category 1,http://www.nuget.org/packages/Etl.Net,
106106
ETL.NET information,Paillave,[email protected],20200518071024,Category 3,,"This ""another post"" about ETL.NET"
107-
FundProcess information,Stéphane Royer,stephane.royer@fundprocess.lu,20210819124550,Category 1,,This another post about FundProcess
107+
coucou information,Stéphane Royer,stephane.royer@coucou.lu,20210819124550,Category 1,,This another post about coucou
108108
```
109109

110110
The normalized structure where this file must be imported is this one:
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
using System.Collections;
2+
using Microsoft.Extensions.FileProviders;
3+
4+
namespace Paillave.Etl.AzureStorageAccountFileProvider;
5+
6+
public class AzureBlobDirectoryContents : IDirectoryContents, IEnumerable<AzureBlobFileInfo>
7+
{
8+
private readonly List<AzureBlobFileInfo> _blobs;
9+
public bool Exists { get; }
10+
internal AzureBlobDirectoryContents(List<AzureBlobFileInfo> blobs)
11+
=> (_blobs, Exists) = (blobs, true);
12+
public IEnumerator<IFileInfo> GetEnumerator()
13+
=> _blobs.ToList().GetEnumerator();
14+
IEnumerator IEnumerable.GetEnumerator()
15+
=> GetEnumerator();
16+
17+
IEnumerator<AzureBlobFileInfo> IEnumerable<AzureBlobFileInfo>.GetEnumerator()
18+
=> _blobs.ToList().GetEnumerator();
19+
}
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
using Azure.Storage.Blobs;
2+
using Azure.Storage.Blobs.Models;
3+
using Microsoft.Extensions.FileProviders;
4+
5+
namespace Paillave.Etl.AzureStorageAccountFileProvider;
6+
7+
public class AzureBlobFileInfo : IFileInfo
8+
{
9+
private readonly BlobClient? _blobClient = null;
10+
private readonly BlobContainerClient? _blobContainerClient = null;
11+
internal AzureBlobFileInfo(BlobClient blobClient)
12+
{
13+
_blobClient = blobClient;
14+
15+
var properties = blobClient.GetProperties()?.Value ?? throw new InvalidOperationException("Cannot get blob properties.");
16+
Name = blobClient.Name.Split('/').Last();
17+
Length = properties.ContentLength;
18+
LastModified = properties.LastModified;
19+
PhysicalPath = blobClient.Name;
20+
}
21+
internal AzureBlobFileInfo(BlobContainerClient blobContainerClient, BlobHierarchyItem blobHierarchyItem)
22+
{
23+
_blobContainerClient = blobContainerClient;
24+
if (blobHierarchyItem.IsPrefix)
25+
{
26+
IsDirectory = true;
27+
Name = blobHierarchyItem.Prefix.TrimEnd('/').Split('/').Last();
28+
PhysicalPath = blobHierarchyItem.Prefix;
29+
}
30+
else
31+
{
32+
_blobClient = blobContainerClient.GetBlobClient(blobHierarchyItem.Blob.Name);
33+
Name = blobHierarchyItem.Blob.Name.Split('/').Last();
34+
Length = blobHierarchyItem.Blob.Properties.ContentLength ?? 0;
35+
LastModified = blobHierarchyItem.Blob.Properties.LastModified ?? DateTimeOffset.MinValue;
36+
PhysicalPath = blobHierarchyItem.Blob.Name;
37+
}
38+
}
39+
public async Task DeleteAsync(CancellationToken cancellationToken = default)
40+
{
41+
if (_blobClient == null)
42+
throw new InvalidOperationException("Cannot delete a directory.");
43+
await _blobClient.DeleteAsync(cancellationToken: cancellationToken);
44+
}
45+
public async Task SaveStreamAsync(Stream stream, CancellationToken cancellationToken = default)
46+
{
47+
if (_blobClient == null)
48+
throw new InvalidOperationException("Cannot save a stream for a directory.");
49+
await _blobClient.UploadAsync(stream, cancellationToken);
50+
var properties = _blobClient.GetProperties()?.Value ?? throw new InvalidOperationException("Cannot get blob properties.");
51+
Length = properties.ContentLength;
52+
LastModified = properties.LastModified;
53+
}
54+
public Task<AzureBlobFileInfo> SaveStream(string name, Stream stream, CancellationToken cancellationToken = default)
55+
{
56+
if (_blobContainerClient == null)
57+
throw new InvalidOperationException("Cannot save a stream for a file.");
58+
return _blobContainerClient.SaveFileAsync($"{PhysicalPath}{name}", stream, true, null, cancellationToken);
59+
}
60+
public Stream CreateReadStream() => _blobClient != null
61+
? _blobClient.OpenRead()
62+
: throw new InvalidOperationException("Cannot open a stream for a directory!");
63+
64+
public bool Exists => true;
65+
public long Length { get; private set; }
66+
public string? PhysicalPath { get; }
67+
public string Name { get; }
68+
public DateTimeOffset LastModified { get; private set; }
69+
public bool IsDirectory { get; }
70+
}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
using Azure.Storage.Blobs;
2+
using Microsoft.Extensions.FileProviders;
3+
using Microsoft.Extensions.Primitives;
4+
5+
namespace Paillave.Etl.AzureStorageAccountFileProvider;
6+
public class AzureBlobFileProvider : IFileProvider, IFileSaver
7+
{
8+
private readonly BlobContainerClient _blobContainerClient;
9+
10+
public AzureBlobFileProvider(AzureBlobOptions azureBlobOptions)
11+
=> _blobContainerClient = azureBlobOptions.GetBlobContainerClient();
12+
13+
public IDirectoryContents GetDirectoryContents(string subpath)
14+
=> _blobContainerClient.GetDirectoryContents(subpath);
15+
16+
public IFileInfo GetFileInfo(string subpath)
17+
=> _blobContainerClient.GetFileInfo(subpath);
18+
public async Task<IFileInfo> SaveFileAsync(string subpath, Stream stream, CancellationToken cancellationToken = default)
19+
=> await _blobContainerClient.SaveFileAsync(subpath, stream, true, null, cancellationToken);
20+
public Task DeleteFileAsync(string subpath, CancellationToken cancellationToken = default)
21+
=> _blobContainerClient.GetFileInfo(subpath).DeleteAsync(cancellationToken);
22+
23+
private class AzureBlobFileProviderChangeToken : IChangeToken
24+
{
25+
private readonly BlobContainerClient _blobContainerClient;
26+
private readonly string _filter;
27+
private readonly List<Action<object>> _callbacks = new();
28+
29+
public AzureBlobFileProviderChangeToken(string filter, BlobContainerClient blobContainerClient)
30+
=> (_filter, _blobContainerClient) = (filter, blobContainerClient);
31+
32+
public bool ActiveChangeCallbacks => false;
33+
public bool HasChanged => false;
34+
public IDisposable RegisterChangeCallback(Action<object?> callback, object? state)
35+
{
36+
_callbacks.Add(callback);
37+
return new DisposableAction(() => _callbacks.Remove(callback));
38+
}
39+
}
40+
public IChangeToken Watch(string filter) => new AzureBlobFileProviderChangeToken(filter, _blobContainerClient);
41+
42+
43+
private class DisposableAction : IDisposable
44+
{
45+
private readonly Action _action;
46+
public DisposableAction(Action action) => _action = action;
47+
public void Dispose() => _action();
48+
}
49+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
namespace Paillave.Etl.AzureStorageAccountFileProvider;
2+
3+
public class AzureBlobOptions
4+
{
5+
public Uri? BaseUri { get; set; }
6+
public bool? DefaultAzureCredential { get; set; }
7+
public required string DocumentContainer { get; set; }
8+
public string? ConnectionString { get; set; }
9+
}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
using Paillave.Etl.Core;
2+
3+
namespace Paillave.Etl.AzureStorageAccountFileProvider;
4+
5+
public class AzureStorageAccountProviderProcessorAdapter : ProviderProcessorAdapterBase<AzureBlobOptions,
6+
AzureStorageAccountAdapterProviderParameters,
7+
AzureStorageAccountAdapterProcessorParameters>
8+
{
9+
public override string Description => "Get file and create document in azure storage account";
10+
public override string Name => "AzureStorageAccount";
11+
12+
protected override IFileValueProvider CreateProvider(string code, string name, string connectionName,
13+
AzureBlobOptions connectionParameters,
14+
AzureStorageAccountAdapterProviderParameters inputParameters)
15+
=> new AzureStorageAccountFileValueProvider(code, name, connectionName, connectionParameters, inputParameters);
16+
17+
protected override IFileValueProcessor CreateProcessor(string code, string name, string connectionName,
18+
AzureBlobOptions connectionParameters,
19+
AzureStorageAccountAdapterProcessorParameters outputParameters)
20+
=> new AzureStorageAccountFileValueProcessor(code, name, connectionName, connectionParameters, outputParameters);
21+
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
using Paillave.Etl.Core;
2+
3+
namespace Paillave.Etl.AzureStorageAccountFileProvider;
4+
5+
public class AzureStorageAccountFileValue : FileValueBase<AzureStorageAccountFileValueMetadata>
6+
{
7+
private readonly AzureBlobOptions _azureBlobOptions;
8+
private readonly AzureBlobFileInfo _fileInfo;
9+
public AzureStorageAccountFileValue(AzureBlobFileInfo fileInfo, string connectorCode, string connectorName, string connectionName, AzureBlobOptions azureBlobOptions)
10+
: base(new AzureStorageAccountFileValueMetadata
11+
{
12+
BaseUri = azureBlobOptions.BaseUri,
13+
Name = fileInfo.Name,
14+
Folder = fileInfo.PhysicalPath,
15+
DocumentContainer = azureBlobOptions.DocumentContainer,
16+
ConnectionName = connectionName,
17+
ConnectorCode = connectorCode,
18+
ConnectorName = connectorName,
19+
})
20+
{
21+
_fileInfo = fileInfo;
22+
_azureBlobOptions = azureBlobOptions;
23+
}
24+
25+
public override string Name => _fileInfo.Name;
26+
27+
public override Stream GetContent()
28+
=> _fileInfo.CreateReadStream();
29+
30+
public override StreamWithResource OpenContent()
31+
=> new(GetContent());
32+
33+
protected override void DeleteFile()
34+
=> _fileInfo.DeleteAsync().Wait();
35+
}
36+
public class AzureStorageAccountFileValueMetadata : FileValueMetadataBase
37+
{
38+
public Uri? BaseUri { get; set; }
39+
public string? Folder { get; set; }
40+
public required string Name { get; set; }
41+
public string? DocumentContainer { get; set; }
42+
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
using Paillave.Etl.Core;
2+
3+
namespace Paillave.Etl.AzureStorageAccountFileProvider;
4+
5+
public class AzureStorageAccountAdapterProcessorParameters
6+
{
7+
public string? SubFolder { get; set; }
8+
public bool? OverwriteIfAlreadyExists { get; set; } = false;
9+
}
10+
11+
public class AzureStorageAccountFileValueProcessor : FileValueProcessorBase<AzureBlobOptions, AzureStorageAccountAdapterProcessorParameters>
12+
{
13+
public AzureStorageAccountFileValueProcessor(string code, string name, string connectionName, AzureBlobOptions connectionParameters, AzureStorageAccountAdapterProcessorParameters processorParameters)
14+
: base(code, name, connectionName, connectionParameters, processorParameters) { }
15+
16+
public override ProcessImpact PerformanceImpact => ProcessImpact.Average;
17+
public override ProcessImpact MemoryFootPrint => ProcessImpact.Light;
18+
protected override void Process(IFileValue fileValue, AzureBlobOptions connectionParameters, AzureStorageAccountAdapterProcessorParameters processorParameters, Action<IFileValue> push, CancellationToken cancellationToken, IExecutionContext context)
19+
{
20+
IDictionary<string, string>? metadata = ExtractMetadataRecursively(fileValue.Metadata);
21+
var blobContainerClient = connectionParameters.GetBlobContainerClient();
22+
var subpath = string.IsNullOrWhiteSpace(processorParameters.SubFolder)
23+
? fileValue.Name
24+
: $"{processorParameters.SubFolder.TrimEnd('/')}/{fileValue.Name}";
25+
blobContainerClient.SaveFileAsync(
26+
subpath,
27+
fileValue.GetContent(),
28+
processorParameters.OverwriteIfAlreadyExists ?? false,
29+
metadata,
30+
cancellationToken).Wait();
31+
push(fileValue);
32+
}
33+
34+
private IDictionary<string, string>? ExtractMetadataRecursively(IFileValueMetadata metadata)
35+
{
36+
if (metadata == null)
37+
return null;
38+
var result = new Dictionary<string, string>();
39+
if (metadata.Properties != null)
40+
{
41+
foreach (var property in metadata.Properties.GetType().GetProperties())
42+
{
43+
var value = property.GetValue(metadata.Properties);
44+
if (value != null)
45+
{
46+
var stringValue = value.ToString();
47+
if (!string.IsNullOrWhiteSpace(stringValue))
48+
result[property.Name] = stringValue;
49+
}
50+
}
51+
}
52+
return result;
53+
}
54+
55+
protected override void Test(AzureBlobOptions connectionParameters, AzureStorageAccountAdapterProcessorParameters processorParameters)
56+
{
57+
var blobContainerClient = connectionParameters.GetBlobContainerClient();
58+
var fileValueName = Guid.NewGuid().ToString();
59+
var subPath = string.IsNullOrWhiteSpace(processorParameters.SubFolder) ? fileValueName : $"{processorParameters.SubFolder.TrimEnd('/')}/{fileValueName}";
60+
var ms = new MemoryStream();
61+
blobContainerClient.SaveFileAsync(subPath, ms).Wait();
62+
blobContainerClient.GetBlobClient(subPath).Delete();
63+
}
64+
65+
private byte[] ToByteArray(Stream stream)
66+
{
67+
using var memoryStream = new MemoryStream();
68+
stream.Seek(0, SeekOrigin.Begin);
69+
stream.CopyTo(memoryStream);
70+
return memoryStream.ToArray();
71+
}
72+
}
73+
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
using Microsoft.Extensions.FileSystemGlobbing;
2+
using Paillave.Etl.Core;
3+
4+
namespace Paillave.Etl.AzureStorageAccountFileProvider;
5+
6+
public class AzureStorageAccountAdapterProviderParameters
7+
{
8+
public string? SubFolder { get; set; }
9+
public string? FileNamePattern { get; set; }
10+
}
11+
12+
public class AzureStorageAccountFileValueProvider : FileValueProviderBase<AzureBlobOptions, AzureStorageAccountAdapterProviderParameters>
13+
{
14+
public AzureStorageAccountFileValueProvider(string code, string name, string connectionName,
15+
AzureBlobOptions connectionParameters,
16+
AzureStorageAccountAdapterProviderParameters inputParameters)
17+
: base(code, name, connectionName, connectionParameters, inputParameters) { }
18+
19+
public override ProcessImpact PerformanceImpact => ProcessImpact.Average;
20+
public override ProcessImpact MemoryFootPrint => ProcessImpact.Light;
21+
protected override void Provide(Action<IFileValue> pushFileValue, AzureBlobOptions connectionParameters,
22+
AzureStorageAccountAdapterProviderParameters providerParameters, CancellationToken cancellationToken,
23+
IExecutionContext context)
24+
{
25+
var searchPattern = string.IsNullOrEmpty(providerParameters.FileNamePattern) ? "*" : providerParameters.FileNamePattern;
26+
var matcher = new Matcher().AddInclude(searchPattern);
27+
var blobContainerClient = connectionParameters.GetBlobContainerClient();
28+
foreach (var blobHierarchyItem in blobContainerClient.GetDirectoryContents(providerParameters.SubFolder ?? string.Empty, cancellationToken).Cast<AzureBlobFileInfo>())
29+
{
30+
if (cancellationToken.IsCancellationRequested) break;
31+
if (!blobHierarchyItem.IsDirectory)
32+
{
33+
if (matcher.Match(blobHierarchyItem.Name).HasMatches)
34+
pushFileValue(new AzureStorageAccountFileValue(blobHierarchyItem, this.Code, this.Name, this.ConnectionName, connectionParameters));
35+
}
36+
}
37+
}
38+
39+
protected override void Test(AzureBlobOptions connectionParameters, AzureStorageAccountAdapterProviderParameters inputParameters)
40+
{
41+
connectionParameters.GetBlobContainerClient().GetDirectoryContents(inputParameters.SubFolder ?? string.Empty);
42+
}
43+
}

0 commit comments

Comments
 (0)