/* * SPDX-License-Identifier: AGPL-3.0-or-later * Copyright (C) 2025 Sergej Görzen * This file is part of OmiLAXR. */ using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Text.RegularExpressions; using Newtonsoft.Json.Linq; using OmiLAXR.Extensions; namespace OmiLAXR.Utils { ///

/// Delegate for custom header name formatting during CSV processing. /// Allows transformation of header names for consistency or compliance requirements. ///

/// Original header name /// Formatted header name public delegate string HeaderFormatter(string header); ///

/// Comprehensive CSV data manipulation class with advanced features for analytics data processing. /// Supports dynamic headers, regex filtering, JSON conversion, and performance-optimized operations. /// Designed for handling large datasets common in learning analytics and data export scenarios. ///

public class CsvFormat { ///

/// Initializes a new CsvFormat instance with specified headers. /// Creates empty row collection ready for data population. ///

/// Initial column headers for the CSV public CsvFormat(params string[] headers) { Headers = headers.ToList(); Rows = new List>(); RebuildHeaderIndexCache(); } ///

/// Initializes a new CsvFormat instance with pre-allocated row capacity. /// Optimizes memory usage for scenarios with known data size. ///

/// Expected number of rows for memory pre-allocation /// Initial column headers for the CSV public CsvFormat(int rowsCapacity, params string[] headers) { Headers = headers.ToList(); Rows = new List>(rowsCapacity); // Pre-allocate for better performance RebuildHeaderIndexCache(); } ///

/// Dynamic list of column headers that can be modified during data processing. /// Automatically indexed for fast lookup operations. ///

public List Headers { get; private set; } ///

/// Collection of data rows, each containing objects corresponding to the headers. /// Supports heterogeneous data types with automatic string conversion during output. ///

public List> Rows { get; } ///

/// Column separator character used in CSV output formatting. /// Defaults to comma but can be changed for other delimiter formats. ///

public string Separator { get; set; } = ","; ///

/// Regular expression pattern for including specific headers in output. /// When set, only headers matching this pattern will be included in generated CSV. ///

public Regex IncludedHeaderPattern { get; set; } ///

/// Regular expression pattern for excluding specific headers from output. /// When set, headers matching this pattern will be excluded from generated CSV. ///

public Regex ExcludedHeaderPattern { get; set; } ///

/// Performance optimization cache mapping header names to their column indices. /// Rebuilt automatically when headers are modified. ///

private Dictionary _headerIndexCache; ///

/// Rebuilds the internal header index cache for fast column lookups. /// Called automatically when headers are modified to maintain performance. ///

private void RebuildHeaderIndexCache() { _headerIndexCache = new Dictionary(Headers.Count); for (var i = 0; i < Headers.Count; i++) _headerIndexCache[Headers[i]] = i; } ///

/// Gets the header index cache, rebuilding it if necessary. /// Lazy evaluation ensures cache is always current with minimal overhead. ///

private Dictionary HeaderIndexCache { get { // Rebuild cache if it's null or out of sync with headers if (_headerIndexCache == null || _headerIndexCache.Count != Headers.Count) RebuildHeaderIndexCache(); return _headerIndexCache; } } ///

/// Sets new headers for the CSV, replacing any existing ones. /// Automatically rebuilds the index cache for optimal performance. ///

/// New array of header names public void SetHeaders(params string[] headers) { Headers = headers?.ToList() ?? new List(50); RebuildHeaderIndexCache(); } ///

/// Applies custom formatting to all headers using the provided formatter function. /// Useful for standardizing header names or applying naming conventions. ///

/// Function to transform each header name public void FormatHeaders(HeaderFormatter formatter) { Headers = Headers.Select(h => formatter(h)).ToList(); } ///

/// Removes all data rows while preserving headers and structure. /// Efficient way to reset data content without recreating the entire object. ///

public void ClearRows() { Rows.Clear(); } ///

/// Renames an existing header to a new name with validation. /// Updates the index cache automatically to maintain performance. ///

/// Current header name to replace /// New name for the header /// Thrown if the old header name is not found public void RenameHeader(string oldName, string newName) { if (!HeaderIndexCache.TryGetValue(oldName, out var index)) throw new ArgumentException($"Header '{oldName}' not found."); Headers[index] = newName; RebuildHeaderIndexCache(); // Update cache with new header name } ///

/// Adds a new data row from a dictionary of key-value pairs. /// Automatically expands headers if new keys are encountered (subject to regex filters). /// Handles missing values gracefully by using null placeholders. ///

/// Dictionary mapping header names to values public void AddRow(Dictionary values) { var headerChanged = false; // Check for new headers that should be added foreach (var key in values.Keys) { // Apply include/exclude regex patterns for header filtering var include = (IncludedHeaderPattern == null || IncludedHeaderPattern.IsMatch(key)) && (ExcludedHeaderPattern == null || !ExcludedHeaderPattern.IsMatch(key)); // Add new header if it passes filtering and doesn't already exist if (include && !Headers.Contains(key)) { Headers.Add(key); headerChanged = true; } } // Rebuild index cache if headers were modified if (headerChanged) RebuildHeaderIndexCache(); // Create row with values corresponding to current headers var row = new List