NumeRe v1.1.4
NumeRe: Framework für Numerische Rechnungen
plugin_statistics.cpp
Go to the documentation of this file.
1/*****************************************************************************
2 NumeRe: Framework fuer Numerische Rechnungen
3 Copyright (C) 2014 Erik Haenel et al.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
17******************************************************************************/
18
19#include <vector>
20
21#include "plugins.hpp"
23#include "maths/student_t.hpp"
24#include "../kernel.hpp"
25
26/*
27 * Funktionen zur Berechnung von Mittelwert und Standardabweichung
28 */
29
30const std::string PI_MED = "1.1.2";
31
32
39{
56};
57
58
70static std::vector<std::vector<double>> calcStats(MemoryManager& _data, const std::string& sTable)
71{
72 long long int nLines = _data.getLines(sTable);
73 long long int nCols = _data.getCols(sTable);
74
75 std::vector<std::vector<double>> vStats (STATS_FIELD_COUNT, std::vector<double>());
76
77 // Calculate built-in statistical values (short-cuts)
78 vStats[STATS_AVG] = mu::real(_data.avg(sTable, "cols"));
79 vStats[STATS_STD] = mu::real(_data.std(sTable, "cols"));
80 vStats[STATS_MED] = mu::real(_data.med(sTable, "cols"));
81 vStats[STATS_Q1] = mu::real(_data.pct(sTable, "cols", 0.25));
82 vStats[STATS_Q3] = mu::real(_data.pct(sTable, "cols", 0.75));
83 vStats[STATS_MIN] = mu::real(_data.min(sTable, "cols"));
84 vStats[STATS_MAX] = mu::real(_data.max(sTable, "cols"));
85 vStats[STATS_NUM] = mu::real(_data.num(sTable, "cols"));
86 vStats[STATS_CNT] = mu::real(_data.cnt(sTable, "cols"));
87 vStats[STATS_RMS] = mu::real(_data.norm(sTable, "cols"));
88
89 for (long long int j = 0; j < nCols; j++)
90 {
91 // Many values make no sense if no data
92 // is available
93 if (!vStats[STATS_NUM][j])
94 {
95 vStats[STATS_CONFINT].push_back(NAN);
96 vStats[STATS_SKEW].push_back(NAN);
97 vStats[STATS_EXC].push_back(NAN);
98 vStats[STATS_STDERR].push_back(NAN);
99 vStats[STATS_S_T].push_back(NAN);
100 vStats[STATS_STD][j] = NAN;
101 vStats[STATS_RMS][j] = NAN;
102 continue;
103 }
104
105 vStats[STATS_CONFINT].push_back(0.0);
106 vStats[STATS_SKEW].push_back(0.0);
107 vStats[STATS_EXC].push_back(0.0);
108
109 // Calculate Confidence interval count,
110 // Skewness and Excess
111 for (long long int i = 0; i < nLines; i++)
112 {
113 if (!_data.isValidElement(i, j, sTable))
114 continue;
115
116 if (fabs(_data.getElement(i, j, sTable) - vStats[STATS_AVG][j]) <= vStats[STATS_STD][j])
117 vStats[STATS_CONFINT][j]++;
118
119 vStats[STATS_SKEW][j] += intPower(_data.getElement(i, j, sTable).real() - vStats[STATS_AVG][j], 3);
120 vStats[STATS_EXC][j] += intPower(_data.getElement(i, j, sTable).real() - vStats[STATS_AVG][j], 4);
121 }
122
123 // Finalize the confidence interval count
124 vStats[STATS_CONFINT][j] /= vStats[STATS_NUM][j];
125 vStats[STATS_CONFINT][j] = round(10000.0*vStats[STATS_CONFINT][j]) / 100.0;
126
127 // Finalize Skewness and Excess
128 vStats[STATS_SKEW][j] /= vStats[STATS_NUM][j] * intPower(vStats[STATS_STD][j], 3);
129 vStats[STATS_EXC][j] /= vStats[STATS_NUM][j] * intPower(vStats[STATS_STD][j], 4);
130 vStats[STATS_EXC][j] -= 3.0; // Convert Kurtosis to Excess
131
132 // Calculate 2nd order stats values available
133 // from simple arithmetic operations
134 vStats[STATS_STDERR].push_back(vStats[STATS_STD][j] / sqrt(vStats[STATS_NUM][j]));
135 vStats[STATS_RMS][j] /= sqrt(vStats[STATS_NUM][j]);
136
137 // Use BOOST to calculate the Student-t value for
138 // the current number of freedoms
139 vStats[STATS_S_T].push_back(student_t(vStats[STATS_NUM][j], 0.95));
140 }
141
142 return vStats;
143}
144
145
155static std::string getStatFieldName(int nStatField)
156{
157 switch (nStatField)
158 {
159 case STATS_AVG:
160 return _lang.get("STATS_TYPE_AVG");
161 case STATS_STD:
162 return _lang.get("STATS_TYPE_STD");
163 case STATS_CONFINT:
164 return _lang.get("STATS_TYPE_CONFINT");
165 case STATS_STDERR:
166 return _lang.get("STATS_TYPE_STDERR");
167 case STATS_MED:
168 return _lang.get("STATS_TYPE_MED");
169 case STATS_Q1:
170 return "Q1";
171 case STATS_Q3:
172 return "Q3";
173 case STATS_RMS:
174 return _lang.get("STATS_TYPE_RMS");
175 case STATS_SKEW:
176 return _lang.get("STATS_TYPE_SKEW");
177 case STATS_EXC:
178 return _lang.get("STATS_TYPE_EXCESS");
179 case STATS_MIN:
180 return "min";
181 case STATS_MAX:
182 return "max";
183 case STATS_NUM:
184 return "num";
185 case STATS_CNT:
186 return "cnt";
187 case STATS_S_T:
188 return "s_t";
190 return "";
191 }
192
193 return "";
194}
195
196
211static void createStatsFile(Output& _out, const std::vector<std::vector<double>>& vStats, const std::string& sSavePath, MemoryManager& _data, const std::string& sTable, const Settings& _option)
212{
213 int nLine = _data.getLines(sTable);
214 int nCol = _data.getCols(sTable);
215 int nHeadlines = _data.getHeadlineCount(sTable);
216 const int nPrecision = 4;
217
218 // Create an output string matrix on the heap
219 std::string** sOut = new std::string*[nLine + STATS_FIELD_COUNT+1 + nHeadlines];
220
221 for (int i = 0; i < nLine + STATS_FIELD_COUNT+1 + nHeadlines; i++)
222 {
223 sOut[i] = new std::string[nCol];
224 }
225
226 // Fill the output matrix with the
227 // previously calculated values
228 for (int j = 0; j < nCol; j++)
229 {
230 // Write an empty column, if no values are available
231 if (!vStats[STATS_NUM][j])
232 {
233 sOut[nHeadlines + nLine + 0][j] = "<<SUMBAR>>";
234
235 for (int n = STATS_AVG; n < STATS_FIELD_COUNT; n++)
236 sOut[nHeadlines + nLine + 1 + n][j] = getStatFieldName(n) + ": ---";
237
238 continue;
239 }
240
241 // Add the headlines to the columns
242 std::string sHeadline = _data.getHeadLineElement(j, sTable);
243
244 for (int i = 0; i < nHeadlines; i++)
245 {
246 if (sHeadline.length())
247 {
248 sOut[i][j] = sHeadline.substr(0, sHeadline.find('\n'));
249
250 if (sHeadline.find('\n') != std::string::npos)
251 sHeadline.erase(0, sHeadline.find('\n') + 1);
252 else
253 break;
254 }
255 }
256
257 // Write the table values to the single columns
258 for (int i = 0; i < nLine; i++)
259 {
260 if (!_data.isValidElement(i,j, sTable))
261 {
262 sOut[i + nHeadlines][j] = "---";
263 continue;
264 }
265
266 sOut[i + nHeadlines][j] = toString(_data.getElement(i,j, sTable), _option.getPrecision()); // Kopieren der Matrix in die Ausgabe
267 }
268
269 // Write the calculated stats to the columns
270 sOut[nHeadlines + nLine + 0][j] = "<<SUMBAR>>"; // Schreiben der berechneten Werte in die letzten drei Zeilen der Ausgabe
271
272 for (int n = STATS_AVG; n < STATS_FIELD_COUNT; n++)
273 sOut[nHeadlines + nLine + 1 + n][j] = getStatFieldName(n) + ": " + toString(vStats[n][j], nPrecision);
274 }
275
276 // --> Allgemeine Ausgabe-Info-Parameter setzen <--
277 _out.setFileName(sSavePath);
278 _out.setPluginName(_lang.get("STATS_OUT_PLGNINFO", PI_MED, _data.getDataFileName(sTable)));
279 _out.setPrefix("stats");
280
281 _out.setCompact(false);
282 _out.setCommentLine(_lang.get("STATS_OUT_COMMENTLINE"));
283
284 _out.format(sOut, nCol, nLine + STATS_FIELD_COUNT+1 + nHeadlines, _option, true, nHeadlines);
285
286 for (int i = 0; i < nLine + STATS_FIELD_COUNT+1 + nHeadlines; i++)
287 {
288 delete[] sOut[i];
289 }
290
291 delete[] sOut;
292
293 _out.reset();
294}
295
296
314static void createStatsOutput(Output& _out, const std::vector<std::vector<double>>& vStats, const std::string& sSavePath, MemoryManager& _data, const std::string& sTable, const Settings& _option)
315{
316 int nCol = _data.getCols(sTable);
317 int nHeadlines = _data.getHeadlineCount(sTable);
318 const int nPrecision = 4;
319
320 // Redirect the control, if necessary
321 if (_out.isFile())
322 createStatsFile(_out, vStats, sSavePath, _data, sTable, _option);
323
324 // Create the overview string table
325 // on the heap
326 std::string** sOverview = new std::string*[STATS_FIELD_COUNT + nHeadlines];
327
328 for (int i = 0; i < STATS_FIELD_COUNT+nHeadlines; i++)
329 sOverview[i] = new std::string[nCol+1];
330
331 sOverview[0][0] = " ";
332
333 // Write the calculated statistics to the
334 // string table
335 for (int j = 0; j < nCol; j++)
336 {
337 // Write the table column headlines
338 std::string sHeadline = _data.getHeadLineElement(j, sTable);
339
340 for (int i = 0; i < nHeadlines; i++)
341 {
342 if (sHeadline.length())
343 {
344 sOverview[i][j+1] = sHeadline.substr(0, sHeadline.find('\n'));
345
346 if (sHeadline.find('\n') != std::string::npos)
347 sHeadline.erase(0, sHeadline.find('\n') + 1);
348 else
349 break;
350 }
351 }
352
353 // Write the first column with table row names
354 if (!j)
355 {
356 for (int n = STATS_AVG; n < STATS_FIELD_COUNT; n++)
357 sOverview[nHeadlines + n][j] = getStatFieldName(n) + ":";
358 }
359
360 // Write an empty column, if no values are available
361 if (!vStats[STATS_NUM][j])
362 {
363 for (int n = STATS_AVG; n < STATS_FIELD_COUNT; n++)
364 {
365 if (n == STATS_CONFINT)
366 sOverview[nHeadlines + n][j+1] = "--- %";
367 else
368 sOverview[nHeadlines + n][j+1] = "---";
369 }
370
371 continue;
372 }
373
374 // Write the actual values to the string table
375 for (int n = STATS_AVG; n < STATS_FIELD_COUNT; n++)
376 {
377 if (n == STATS_CONFINT)
378 sOverview[nHeadlines + n][j+1] = toString(vStats[n][j], nPrecision) + " %";
379 else
380 sOverview[nHeadlines + n][j+1] = toString(vStats[n][j], nPrecision);
381 }
382 }
383
384 _out.setCompact(false);
385 _out.setCommentLine(_lang.get("STATS_OUT_COMMENTLINE"));
386
388 make_hline();
389 NumeReKernel::print("NUMERE: " + toSystemCodePage(toUpperCase(_lang.get("STATS_HEADLINE"))));
390 make_hline();
391 _out.format(sOverview, nCol+1, STATS_FIELD_COUNT+nHeadlines, _option, true, nHeadlines);
392 _out.reset();
394 make_hline();
395
396 // --> Speicher wieder freigeben! <--
397 for (int i = 0; i < STATS_FIELD_COUNT+nHeadlines; i++)
398 delete[] sOverview[i];
399
400 delete[] sOverview;
401
402 // --> Output-Instanz wieder zuruecksetzen <--
403 _out.reset();
404
405}
406
407
417std::string plugin_statistics(std::string& sCmd, MemoryManager& _data)
418{
422
423 Indices _idx;
424
425 std::string sSavePath;
426 std::string sRet;
427
428 // Get the target table, if the user specified one,
429 // otherwise just leave it empty
430 std::string sTarget = evaluateTargetOptionInCommand(sCmd, "", _idx, NumeReKernel::getInstance()->getParser(), _rootData, _option);
431
432 // Ensure that at least some data is available
433 if (!_data.isValid())
435
436 if (findParameter(sCmd, "save", '=') || findParameter(sCmd, "export", '='))
437 {
438 int nPos = 0;
439
440 if (findParameter(sCmd, "save", '='))
441 nPos = findParameter(sCmd, "save", '=')+4;
442 else
443 nPos = findParameter(sCmd, "export", '=')+6;
444
445 _out.setStatus(true);
446 sSavePath = getArgAtPos(sCmd, nPos);
447 }
448
449 if (findParameter(sCmd, "save") || findParameter(sCmd, "export"))
450 _out.setStatus(true);
451
452 std::string sDatatable = "data";
453
454 if (_data.matchTableAsParameter(sCmd).length())
455 sDatatable = _data.matchTableAsParameter(sCmd);
456
457 // Ensure that the table is not empty
458 if (_data.isEmpty(sDatatable))
460
461 // Calculate the statistics
462 std::vector<std::vector<double>> vStats = calcStats(_data, sDatatable);
463
464 // Write the statistics to the target table, if a
465 // target table was specified
466 if (sTarget.length())
467 {
468 for (size_t i = 0; i < vStats.size(); i++)
469 {
470 for (size_t j = 0; j < vStats[i].size(); j++)
471 {
472 if (!i && j < _idx.col.size())
473 _rootData.setHeadLineElement(_idx.col[j], sTarget, _data.getHeadLineElement(j, sDatatable));
474
475 if (!vStats[STATS_NUM][j])
476 continue;
477
478 if (i < _idx.row.size() && j < _idx.col.size())
479 _rootData.writeToTable(_idx.row[i], _idx.col[j], sTarget, vStats[i][j]);
480 }
481 }
482
483 sRet = "{";
484
485 for (int n = STATS_AVG; n < STATS_FIELD_COUNT; n++)
486 {
487 sRet += "\"" + getStatFieldName(n) + "\",";
488 }
489
490 sRet.back() = '}';
491 }
492
493 // Create the output for the terminal and the file,
494 // if necessary
495 createStatsOutput(_out, vStats, sSavePath, _data, sDatatable, _option);
496
497 return sRet;
498}
499
500
std::string get(const std::string &sMessage, const std::vector< std::string > &vTokens) const
This member function returns the language string for the passed language identifier and replaces all ...
Definition: language.cpp:292
This class represents the central memory managing instance. It will handle all tables and clusters,...
std::string matchTableAsParameter(const std::string &sExpression, char cFollowing=' ')
std::vector< mu::value_type > med(const std::string &sTable, std::string sDir) const
std::vector< mu::value_type > avg(const std::string &sTable, std::string sDir) const
mu::value_type getElement(int _nLine, int _nCol, const std::string &_sTable) const
int getHeadlineCount(const std::string &_sTable) const
bool isValidElement(long long int _nLine, long long int _nCol, const std::string &_sTable) const
std::vector< mu::value_type > min(const std::string &sTable, std::string sDir) const
std::vector< mu::value_type > norm(const std::string &sTable, std::string sDir) const
std::vector< mu::value_type > pct(const std::string &sTable, std::string sDir, mu::value_type dPct=0.5) const
int getLines(StringView sTable, bool _bFull=false) const
bool isEmpty(const std::string &sTable) const
std::vector< mu::value_type > cnt(const std::string &sTable, std::string sDir) const
std::vector< mu::value_type > num(const std::string &sTable, std::string sDir) const
std::string getHeadLineElement(int _i, const std::string &_sTable) const
std::vector< mu::value_type > max(const std::string &sTable, std::string sDir) const
bool isValid() const
Evaluates, whether there's at least a single non-empty table.
std::vector< mu::value_type > std(const std::string &sTable, std::string sDir) const
void writeToTable(int _nLine, int _nCol, const std::string &_sCache, const mu::value_type &_dData)
bool setHeadLineElement(int _i, const std::string &_sTable, std::string _sHead)
int getCols(StringView sTable, bool _bFull=false) const
std::string getDataFileName(const std::string &sTable) const
This member function will return the file name of the selected table. Will default to the table name.
static NumeReKernel * getInstance()
This static member function returns a a pointer to the singleton instance of the kernel.
Definition: kernel.hpp:221
Output & getOutput()
Definition: kernel.hpp:306
MemoryManager & getMemoryManager()
Definition: kernel.hpp:263
static void print(const std::string &__sLine, bool printingEnabled=true)
This member function appends the passed string as a new output line to the buffer and informs the ter...
Definition: kernel.cpp:2636
static void toggleTableStatus()
Toggles the table writing status, which will reduce the number or events send to the terminal.
Definition: kernel.cpp:3671
Settings & getSettings()
Definition: kernel.hpp:296
void format(std::string **_sMatrix, long long int _nCol, long long int _nLine, const Settings &_option, bool bDontAsk=false, int nHeadLineCount=1)
Definition: output.cpp:403
void setPrefix(std::string _sPrefix)
Definition: output.cpp:897
void setPluginName(std::string _sPluginName)
Definition: output.cpp:204
bool isFile() const
Definition: output.cpp:147
void setStatus(bool bStatus)
Definition: output.cpp:65
void reset()
Definition: output.cpp:75
void setCompact(bool _bCompact)
Definition: output.cpp:141
void setCommentLine(std::string _sCommentLine)
Definition: output.cpp:211
void setFileName(std::string sFile)
Definition: output.cpp:153
This class manages the setting values of the internal (kernel) settings of this application.
Definition: settings.hpp:663
size_t getPrecision() const
Returns the precision for displaying floating numbers in the terminal. This value determines the numb...
Definition: settings.hpp:1000
Common exception class for all exceptions thrown in NumeRe.
Definition: error.hpp:32
@ NO_DATA_AVAILABLE
Definition: error.hpp:160
@ NO_CACHED_DATA
Definition: error.hpp:158
static size_t invalid_position
Definition: error.hpp:235
size_t size() const
This member function returns the size of the indices stored in this class.
Definition: structures.hpp:314
Language _lang
Definition: kernel.cpp:39
std::string toSystemCodePage(std::string)
Converts an internal to an external string. Does nothing currently.
std::complex< double > intPower(const std::complex< double > &, int)
This function calculates the power of a value with the specialization that the exponent is an integer...
Definition: tools.cpp:3640
CONSTCD14 To round(const std::chrono::duration< Rep, Period > &d)
Definition: date.h:1278
std::vector< double > real(const std::vector< value_type > &vVec)
string evaluateTargetOptionInCommand(string &sCmd, const string &sDefaultTarget, Indices &_idx, Parser &_parser, MemoryManager &_data, const Settings &_option)
This function evaluates the "target=TABLE()" expression and creates the target table,...
static void createStatsFile(Output &_out, const std::vector< std::vector< double > > &vStats, const std::string &sSavePath, MemoryManager &_data, const std::string &sTable, const Settings &_option)
This static function will create the output file using the functionalities of the Output class.
static void createStatsOutput(Output &_out, const std::vector< std::vector< double > > &vStats, const std::string &sSavePath, MemoryManager &_data, const std::string &sTable, const Settings &_option)
This static function creates the output table for the terminal. The table will get formatted using th...
static std::vector< std::vector< double > > calcStats(MemoryManager &_data, const std::string &sTable)
This static function calculates the statistical values for all columns in the passed table and return...
std::string plugin_statistics(std::string &sCmd, MemoryManager &_data)
This is the implementation of the stats command.
const std::string PI_MED
static std::string getStatFieldName(int nStatField)
This static function maps the statistical value enumerators to strings used for the tables and the re...
StatsFields
This enumeration defines the available statistical values in the vector returned from calcStats().
@ STATS_RMS
@ STATS_CNT
@ STATS_SKEW
@ STATS_EXC
@ STATS_NUM
@ STATS_Q3
@ STATS_S_T
@ STATS_Q1
@ STATS_AVG
@ STATS_MED
@ STATS_FIELD_COUNT
@ STATS_STD
@ STATS_CONFINT
@ STATS_MIN
@ STATS_STDERR
@ STATS_MAX
int findParameter(const std::string &sCmd, const std::string &sParam, const char cFollowing)
This function searches the passed parameter in the passed command string. If something is found,...
Definition: tools.cpp:113
std::string toUpperCase(const std::string &sLowerCase)
Converts lowercase letters to uppercase ones.
This structure is central for managing the indices of a table or cluster read or write data access....
VectorIndex col
VectorIndex row
std::string toString(int)
Converts an integer to a string without the Settings bloat.
double student_t(int nFreedoms, double dConfidenceInterval)
Calculate the student_t value for the selected degrees of freedoms and the desired confidence interva...
Definition: student_t.cpp:35
string getArgAtPos(const string &sCmd, unsigned int nPos, int extraction)
Extracts a options value at the selected position and applies automatic parsing, if necessary.
Definition: tools.cpp:1598
void make_hline(int nLength=-1)
This function prints a horizontal line to the terminal using either minus or equal signs.
Definition: kernel.cpp:3720