Skip to content

Commit f9e012c

Browse files
committed
feat(readme): add documentation on new metorikkuTester feature for csv output mocks
1 parent d973f68 commit f9e012c

File tree

2 files changed

+28
-11
lines changed

2 files changed

+28
-11
lines changed

README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,29 @@ And the corresponding `mocks/table_1.jsonl`:
136136
{ "id": 1, "name": "test3" }
137137
```
138138

139+
Note that it is also possible to use csv mocks for the Expected output. In this case use the `testsFiles` option instead of `tests` like this -
140+
141+
```yaml
142+
metric: "/path/to/metric"
143+
mocks:
144+
- name: table_1
145+
path: mocks/table_1.jsonl
146+
testsFiles:
147+
- name: expected
148+
path: "/path/to/expected.csv"
149+
keys:
150+
df2:
151+
- id
152+
- name
153+
```
154+
155+
while the csv file under "/path/to/expected.csv" will look like this -
156+
```csv
157+
id,name
158+
200,test
159+
300,test2
160+
```
161+
139162
The Keys section allows the user to define the unique columns of every DataFrame's expected results -
140163
every expected row result should have a unique combination for the values of the key columns.
141164
This part is optional and can be used to define only part of the expected DataFrames -

src/main/scala/com/yotpo/metorikku/test/Tester.scala

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -105,22 +105,19 @@ case class Tester(config: TesterConfig) {
105105
var errors = Array[ErrorMessage]()
106106
val metricExpectedTests = getExpected(config, job.sparkSession)
107107
val configuredKeys = config.test.keys
108-
109108
val invalidSchemaMap = getTableNameToInvalidRowStructureIndexes(metricExpectedTests)
110109
if (invalidSchemaMap.nonEmpty) return getInvalidSchemaErrors(invalidSchemaMap)
111110

112111
metricExpectedTests.keys.foreach(tableName => {
113112
val actualResultsDf = extractTableContents(job.sparkSession, tableName, config.test.outputMode.get)
114-
115113
val (expectedResults, actualResults) = (metricExpectedTests(tableName), TestUtil.getRowsFromDf(actualResultsDf))
116114
val (expectedResultsObjects, actualResultsObjects) = (EnrichedRows(expectedResults), EnrichedRows(actualResults))
117115
val tableNameToAllExpectedColumns = metricExpectedTests.mapValues(v => v.head.keys.toList)
118116
val allExpectedColumns = tableNameToAllExpectedColumns.getOrElse(tableName, List[String]())
119-
val (isConfiguredKeysValid, keys) = getConfiguredKeysValidToTableKeys(configuredKeys, tableName, allExpectedColumns, tableName)
117+
val (isConfiguredKeysValid, tableKeys) = getConfiguredKeysValidToTableKeys(configuredKeys, tableName, allExpectedColumns, tableName)
120118
if (!isConfiguredKeysValid) {
121-
return getInvalidKeysNonExistingErrors(allExpectedColumns, keys, tableName)
119+
return getInvalidKeysNonExistingErrors(allExpectedColumns, tableKeys, tableName)
122120
}
123-
val tableKeys = keys
124121
val keyColumns = KeyColumns(tableKeys)
125122
val (expectedKeys, actualKeys) = (keyColumns.getKeysMapFromRows(expectedResults), keyColumns.getKeysMapFromDF(actualResultsDf))
126123
val (expectedResultsDuplications, actualResultsDuplications) = (TestUtil.getDuplicatedRowToIndexes(expectedKeys),
@@ -133,16 +130,13 @@ case class Tester(config: TesterConfig) {
133130
Array[ErrorMessage](new DuplicatedHeaderErrorMessage()) ++
134131
ErrorMessage.getErrorMessagesByDuplications(ResultsType.expected, expectedResultsDuplications, printableExpectedResults, tableName, keyColumns) ++
135132
ErrorMessage.getErrorMessagesByDuplications(ResultsType.actual, actualResultsDuplications, printableActualResults, tableName, keyColumns)
136-
137133
case _ =>
138134
val sorter = TesterSortData(tableKeys)
139135
if (expectedKeys.sortWith(sorter.sortStringRows).deep != actualKeys.sortWith(sorter.sortStringRows).deep) {
140136
val (expErrorIndexes, actErrorIndexes) = compareKeys(expectedKeys, actualKeys)
141-
ErrorMessage.getErrorMessageByMismatchedKeys(printableExpectedResults, printableActualResults,
142-
expErrorIndexes, actErrorIndexes, keyColumns, tableName)
143-
} else {
144-
ErrorMessage.getErrorMessagesByMismatchedAllCols(tableKeys, printableExpectedResults, printableActualResults, job.sparkSession, tableName)
145-
}
137+
ErrorMessage.getErrorMessageByMismatchedKeys(printableExpectedResults, printableActualResults, expErrorIndexes, actErrorIndexes,
138+
keyColumns, tableName)
139+
} else {ErrorMessage.getErrorMessagesByMismatchedAllCols(tableKeys, printableExpectedResults, printableActualResults, job.sparkSession, tableName)}
146140
}
147141
if (tableErrorDataArr.nonEmpty) {
148142
errors ++= tableErrorDataArr

0 commit comments

Comments
 (0)