@inproceedings{c2909602b0de4163b15df11f0940eeee,
title = "Auto-Detection of Field-Level Dependencies in Data Workflow on a Distributed Platform",
abstract = "With the observed significant rise in the use of data across a variety of industries in the modern world, distributed systems are now required to process and consume Big Data. HPCC (High-Performance Computing Clusters) system is an open-source data lake platform built for high-speed large-volume data engineering. Enterprise Control Language (ECL) is a declarative language specifically designed for huge data projects on the HPCC system platform. Large amounts of data are processed on a regular basis using HPCC systems, In the proposed work an approach to understand and interpret the data flow within an ECL program is investigated. The current system renders an XML graph, which shows the operations at dataset level which can be viewed in the ECL Watch, an interactive web application developed by HPCC systems. As the data changes of individual fields within dataset are not represented, the proposed work field level data and dependencies within datasets are tracked and their changes and operations are visualized as a directed acyclic graph for a generic ECL program to understand its data workflow. The core of this project relies on parsing the ECL IR (Intermediate Representation) emitted by the ECL compiler. The IR generated is transformed into graphical format. The system was tested against sample ECL programs available in ECL watch and other programs available in the platform regression tests and it provided a simple easy to comprehend data flow visualization.",
keywords = "Big data, Distributed systems, Graph Representation, HPCC systems, Parser",
author = "Y. Surya and Sumanth Hegde and Jyothi Shetty and G. Shobha and Dan Camper",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Singapore Pte Ltd. 2024.; 12th International Conference on Soft Computing for Problem Solving, SocProS 2023 ; Conference date: 10-08-2023 Through 12-08-2023",
year = "2024",
doi = "10.1007/978-981-97-3180-0_24",
language = "Ingl{\'e}s",
isbn = "9789819731794",
series = "Lecture Notes in Networks and Systems",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "373--385",
editor = "Millie Pant and Kusum Deep and Atulya Nagar",
booktitle = "Proceedings of the 12th International Conference on Soft Computing for Problem Solving - SocProS 2023",
address = "Alemania",
}