From 3542350909b1213240e5097a1793a7c0733f6196 Mon Sep 17 00:00:00 2001 From: Fabian Mastenbroek Date: Tue, 13 Dec 2022 23:59:47 +0000 Subject: fix(trace/wtf): Disable Parquet strict typing This change fixes an issue where some of the traces from the Workflow Trace Archive would fail to load with the trace format in OpenDC. This was caused by one of the fields being stored as a double, while the formats expects it to be a long. Parquet does not support unioning primitive types. Therefore, we have to disable strict type checking when reading the file. Furthermore, we need to support double entries for storing the workflow ids. --- .../test/resources/shell/workload/schema-1.0/generic_information.json | 1 + 1 file changed, 1 insertion(+) create mode 100755 opendc-trace/opendc-trace-wtf/src/test/resources/shell/workload/schema-1.0/generic_information.json (limited to 'opendc-trace/opendc-trace-wtf/src/test/resources/shell/workload') diff --git a/opendc-trace/opendc-trace-wtf/src/test/resources/shell/workload/schema-1.0/generic_information.json b/opendc-trace/opendc-trace-wtf/src/test/resources/shell/workload/schema-1.0/generic_information.json new file mode 100755 index 00000000..5949ab59 --- /dev/null +++ b/opendc-trace/opendc-trace-wtf/src/test/resources/shell/workload/schema-1.0/generic_information.json @@ -0,0 +1 @@ +{"total_workflows": 3403, "total_tasks": 10208, "domain": "Industrial", "date_start": null, "date_end": null, "num_sites": 3403, "num_resources": 10208.0, "num_users": 1, "num_groups": 1, "total_resource_seconds": 89229.863, "authors": ["Shenjun Ma", "Alexey Ilyushkin", "Alexander Stegehuis", "Alexandru Iosup"], "min_resource_task": 1.0, "max_resource_task": 1.0, "std_resource_task": 0.0, "mean_resource_task": 1.0, "median_resource_task": 1.0, "first_quartile_resource_task": 1.0, "third_quartile_resource_task": 1.0, "cov_resource_task": 0.0, "min_memory": -1, "max_memory": -1, "std_memory": 0.0, "mean_memory": -1.0, "median_memory": -1, "first_quartile_memory": -1, "third_quartile_memory": -1, "cov_memory": -0.0, "min_network_usage": -1, "max_network_usage": -1, "std_network_usage": 0.0, "mean_network_usage": -1.0, "median_network_usage": -1, "first_quartile_network_usage": -1, "third_quartile_network_usage": -1, "cov_network_usage": -0.0, "min_disk_space_usage": -1, "max_disk_space_usage": -1, "std_disk_space_usage": 0.0, "mean_disk_space_usage": -1.0, "median_disk_space_usage": -1, "first_quartile_disk_space_usage": -1, "third_quartile_disk_space_usage": -1, "cov_disk_space_usage": -0.0, "min_energy": -1, "max_energy": -1, "std_energy": 0.0, "mean_energy": -1.0, "median_energy": -1, "first_quartile_energy": -1, "third_quartile_energy": -1, "cov_energy": -0.0, "workload_description": "Chronos is a trace from Shell's Chronos IoT production system. It contains pipelines where sensor data is obtained, checked if values are within range (e.g. temperature, operational status, etc.), and the outcomes are written to persistent storage."} \ No newline at end of file -- cgit v1.2.3