private void button_FREQUENCY_DISTRIBUTIONS_ON_WORDSNETS_6_COLUMNS_Click(object sender, EventArgs e)
{
System.Windows.Forms.OpenFileDialog ofd = new System.Windows.Forms.OpenFileDialog
{
Title = "Select CSV file",
Filter = "CSV Files (*.csv)|*.csv"
};
if (ofd.ShowDialog() != System.Windows.Forms.DialogResult.OK)
{
return;
}//END OF ELSE OF System.Windows.Forms.OpenFileDialog ofd = new System.Windows.Forms.OpenFileDialog
string inputPath = ofd.FileName;
string baseDir = System.IO.Path.GetDirectoryName(inputPath);
string inputCsvPath = inputPath;// @"wordnet_data.csv"; // Replace with your actual CSV file path
string outputReportPath = inputPath + "_wordnet_token_frequency_report.txt";// = @"wordnet_token_frequency_report.txt";
var tokenFrequency = new Dictionary<string, int>(StringComparer.OrdinalIgnoreCase);
var lines = File.ReadAllLines(inputCsvPath);
foreach (var line in lines.Skip(1)) // Skip header
{
// string[] columns = line.Split('\t'); // Assuming TSV (tab-separated); if CSV use ','
string[] columns = line.Split(','); // Assuming TSV (tab-separated); if CSV use ','
if (columns.Length < 6) continue;
for (int i = 0; i < 6; i++)
{
string content = columns[i];
var tokens = content
.Replace("_", " ")
.Split(new[] { ' ', '\t' }, StringSplitOptions.RemoveEmptyEntries);
foreach (var token in tokens)
{
string cleanToken = token.Trim();
if (string.IsNullOrWhiteSpace(cleanToken))
continue;
if (!tokenFrequency.ContainsKey(cleanToken))
tokenFrequency[cleanToken] = 0;
tokenFrequency[cleanToken]++;
}
}
}
var sortedTokens = tokenFrequency.OrderByDescending(kv => kv.Value);
using (var writer = new StreamWriter(outputReportPath))
{
writer.WriteLine("Token\tFrequency");
foreach (var kv in sortedTokens)
{
writer.WriteLine($"{kv.Key}\t{kv.Value}");
}
}
Console.WriteLine("Token frequency report generated: " + outputReportPath);
System.Windows.Forms.MessageBox.Show("Token frequency report generated: " + outputReportPath);
}// private void button_FREQUENCY_DISTRIBUTIONS_ON_WORDSNETS_6_COLUMNS_Click(object sender, EventArgs e)
No comments:
Post a Comment