NumeRe v1.1.4
NumeRe: Framework für Numerische Rechnungen
memory.cpp
Go to the documentation of this file.
1/*****************************************************************************
2 NumeRe: Framework fuer Numerische Rechnungen
3 Copyright (C) 2018 Erik Haenel et al.
4
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
17******************************************************************************/
18
19#include <memory>
20#include <gsl/gsl_statistics.h>
21#include <gsl/gsl_sort.h>
22#include <gsl/gsl_cdf.h>
23
24#include "memory.hpp"
25#include "tablecolumnimpl.hpp"
26#include "../../kernel.hpp"
27#include "../io/file.hpp"
28#include "../ui/error.hpp"
29#include "../settings.hpp"
30#include "../utils/tools.hpp"
31#include "../version.h"
32#include "../maths/resampler.h"
33#include "../maths/statslogic.hpp"
34#include "../maths/matdatastructures.hpp"
35
36#define MAX_TABLE_SIZE 1e8
37#define MAX_TABLE_COLS 1e4
38#define DEFAULT_COL_TYPE ValueColumn
39
40
41using namespace std;
42
43
44
49{
50 nCalcLines = -1;
51 bSaveMutex = false;
52 m_meta.save();
53}
54
55
63Memory::Memory(size_t _nCols) : Memory()
64{
65 Allocate(_nCols);
66}
67
68
74{
75 clear();
76}
77
78
89bool Memory::Allocate(size_t _nNCols, bool shrink)
90{
91 if (_nNCols > MAX_TABLE_COLS)
93
94 // We simply resize the number of columns. Note, that
95 // this only affects the column count. The column themselves
96 // are not automatically allocated
97 memArray.resize(std::max(_nNCols, memArray.size()));
98
99 if (shrink)
100 {
101 // Iterate through the columns
102 for (TblColPtr& col : memArray)
103 {
104 // If a column exist, call the shrink method
105 if (col)
106 col->shrink();
107 }
108 }
109
110 return true;
111}
112
113
122{
123 for (size_t j = 0; j < memArray.size(); j++)
124 {
125 if (!memArray[j])
126 memArray[j].reset(new DEFAULT_COL_TYPE);
127
128 if (!memArray[j]->m_sHeadLine.length())
129 memArray[j]->m_sHeadLine = TableColumn::getDefaultColumnHead(j);
130 }
131}
132
133
142{
143 memArray.clear();
144
145 nCalcLines = -1;
146 m_meta.modify();
147 bSaveMutex = false;
148
149 return true;
150}
151
152
161{
162 clear();
163
164 memArray.resize(other.memArray.size());
165
166 #pragma omp parallel for
167 for (size_t i = 0; i < other.memArray.size(); i++)
168 {
169 if (!other.memArray[i])
170 continue;
171
172 switch (other.memArray[i]->m_type)
173 {
175 memArray[i].reset(new ValueColumn);
176 break;
178 memArray[i].reset(new DateTimeColumn);
179 break;
181 memArray[i].reset(new StringColumn);
182 break;
184 memArray[i].reset(new LogicalColumn);
185 break;
187 memArray[i].reset(new CategoricalColumn);
188 break;
189
190 // These labels are only for getting warnings
191 // if new column types are added
196 break;
197 }
198
199 memArray[i]->assign(other.memArray[i].get());
200 }
201
202 m_meta = other.m_meta;
203 nCalcLines = other.nCalcLines;
204 m_meta.modify();
205
206 return *this;
207}
208
209
221bool Memory::resizeMemory(size_t _nLines, size_t _nCols)
222{
223 if (!Allocate(_nCols))
224 return false;
225
226 return true;
227}
228
229
241int Memory::getCols(bool _bFull) const
242{
243 return memArray.size();
244}
245
246
258int Memory::getLines(bool _bFull) const
259{
260 if (memArray.size())
261 {
262 if (nCalcLines != -1)
263 return nCalcLines;
264
265 size_t nReturn = 0;
266
267 for (const TblColPtr& col : memArray)
268 {
269 if (col && col->size() > nReturn)
270 nReturn = col->size();
271 }
272
273 // Cache the number of lines until invalidation
274 // for faster access
275 nCalcLines = nReturn;
276
277 return nReturn;
278 }
279
280 return 0;
281}
282
283
293int Memory::getElemsInColumn(size_t col) const
294{
295 if (memArray.size() > col && memArray[col])
296 return memArray[col]->size();
297
298 return 0;
299}
300
301
312{
313 if (memArray.size() > col && memArray[col])
314 return memArray[col]->getNumFilledElements();
315
316 return 0;
317}
318
319
327size_t Memory::getSize() const
328{
329 size_t bytes = 0;
330
331 for (const TblColPtr& col : memArray)
332 {
333 if (col)
334 bytes += col->getBytes();
335 }
336
337 return bytes + m_meta.comment.length();
338}
339
340
350mu::value_type Memory::readMem(size_t _nLine, size_t _nCol) const
351{
352 if (memArray.size() > _nCol && memArray[_nCol])
353 return memArray[_nCol]->getValue(_nLine);
354
355 return NAN;
356}
357
358
367static mu::value_type nanAvg(const std::vector<mu::value_type>& values)
368{
369 mu::value_type sum = 0.0;
370 double c = 0.0;
371
372 for (mu::value_type val : values)
373 {
374 if (!mu::isnan(val))
375 {
376 sum += val;
377 c++;
378 }
379 }
380
381 if (c)
382 return sum / c;
383
384 return sum;
385}
386
387
398mu::value_type Memory::readMemInterpolated(double _dLine, double _dCol) const
399{
400 if (isnan(_dLine) || isnan(_dCol))
401 return NAN;
402
403 // Find the base index
404 int nBaseLine = intCast(_dLine) + (_dLine < 0 ? -1 : 0);
405 int nBaseCol = intCast(_dCol) + (_dCol < 0 ? -1 : 0);
406
407 // Get the decimal part of the double indices
408 double x = _dLine - nBaseLine;
409 double y = _dCol - nBaseCol;
410
411 // Find the surrounding four entries
412 mu::value_type f00 = readMem(nBaseLine, nBaseCol);
413 mu::value_type f10 = readMem(nBaseLine+1, nBaseCol);
414 mu::value_type f01 = readMem(nBaseLine, nBaseCol+1);
415 mu::value_type f11 = readMem(nBaseLine+1, nBaseCol+1);
416
417 // If all are NAN, return NAN
418 if (mu::isnan(f00) && mu::isnan(f01) && mu::isnan(f10) && mu::isnan(f11))
419 return NAN;
420
421 // Get the average respecting NaNs
422 mu::value_type dNanAvg = nanAvg({f00, f01, f10, f11});
423
424 // Otherwise set NAN to zero
425 f00 = mu::isnan(f00) ? dNanAvg : f00;
426 f10 = mu::isnan(f10) ? dNanAvg : f10;
427 f01 = mu::isnan(f01) ? dNanAvg : f01;
428 f11 = mu::isnan(f11) ? dNanAvg : f11;
429
430 // f(0,0) (1-x) (1-y) + f(1,0) x (1-y) + f(0,1) (1-x) y + f(1,1) x y
431 return f00*(1.0-x)*(1.0-y) + f10*x*(1.0-y) + f01*(1.0-x)*y + f11*x*y;
432}
433
434
444std::vector<mu::value_type> Memory::readMem(const VectorIndex& _vLine, const VectorIndex& _vCol) const
445{
446 std::vector<mu::value_type> vReturn;
447
448 if ((_vLine.size() > 1 && _vCol.size() > 1) || !memArray.size())
449 vReturn.push_back(NAN);
450 else
451 {
452 vReturn.resize(_vLine.size()*_vCol.size(), NAN);
453
454 //#pragma omp parallel for
455 for (size_t j = 0; j < _vCol.size(); j++)
456 {
457 if (_vCol[j] < 0)
458 continue;
459
460 int elems = getElemsInColumn(_vCol[j]);
461
462 if (!elems)
463 continue;
464
465 for (size_t i = 0; i < _vLine.size(); i++)
466 {
467 if (_vLine[i] < 0)
468 continue;
469
470 if (_vLine[i] >= elems)
471 {
472 if (_vLine.isExpanded() && _vLine.isOrdered())
473 break;
474
475 continue;
476 }
477
478 vReturn[j + i * _vCol.size()] = memArray[_vCol[j]]->getValue(_vLine[i]);
479 }
480 }
481 }
482
483 return vReturn;
484}
485
486
497Matrix Memory::readMemAsMatrix(const VectorIndex& _vLine, const VectorIndex& _vCol) const
498{
499 if (!memArray.size())
500 return Matrix(1, 1);
501
502 Matrix mat(_vLine.size(), _vCol.size());
503
504 #pragma omp parallel for
505 for (size_t j = 0; j < _vCol.size(); j++)
506 {
507 if (_vCol[j] < 0 || !memArray[_vCol[j]])
508 continue;
509
510 // Get the complete column as a whole because it seems
511 // to be much faster (VTABLE issues? Cache locality?)
512 std::vector<mu::value_type> vEntries = memArray[_vCol[j]]->getValue(_vLine);
513
514 for (size_t i = 0; i < vEntries.size(); i++)
515 {
516 mat(i, j) = vEntries[i];
517 }
518
519 /*int elems = getElemsInColumn(_vCol[j]);
520
521 if (!elems)
522 continue;
523
524 for (size_t i = 0; i < _vLine.size(); i++)
525 {
526 if (_vLine[i] < 0)
527 continue;
528
529 if (_vLine[i] >= elems)
530 {
531 if (_vLine.isExpanded() && _vLine.isOrdered())
532 break;
533
534 continue;
535 }
536
537 mat(i, j) = memArray[_vCol[j]]->getValue(_vLine[i]);
538 }*/
539 }
540
541 return mat;
542}
543
544
554ValueVector Memory::readMixedMem(const VectorIndex& _vLine, const VectorIndex& _vCol) const
555{
556 ValueVector vReturn;
557
558 if ((_vLine.size() > 1 && _vCol.size() > 1) || !memArray.size())
559 vReturn.push_back("");
560 else
561 {
562 vReturn.resize(_vLine.size()*_vCol.size(), "\"\"");
563
564 //#pragma omp parallel for
565 for (size_t j = 0; j < _vCol.size(); j++)
566 {
567 if (_vCol[j] < 0)
568 continue;
569
570 int elems = getElemsInColumn(_vCol[j]);
571
572 if (!elems)
573 continue;
574
575 for (size_t i = 0; i < _vLine.size(); i++)
576 {
577 if (_vLine[i] < 0)
578 continue;
579
580 if (_vLine[i] >= elems)
581 {
582 if (_vLine.isExpanded() && _vLine.isOrdered())
583 break;
584
585 continue;
586 }
587
588 vReturn[j + i * _vCol.size()] = memArray[_vCol[j]]->getValueAsString(_vLine[i]);
589 }
590 }
591 }
592
593 return vReturn;
594}
595
596
607{
608 ValueVector vReturn;
609
610 if ((_vLine.size() > 1 && _vCol.size() > 1) || !memArray.size())
611 vReturn.push_back("");
612 else
613 {
614 vReturn.resize(_vLine.size()*_vCol.size(), "\"\"");
615
616 //#pragma omp parallel for
617 for (size_t j = 0; j < _vCol.size(); j++)
618 {
619 if (_vCol[j] < 0)
620 continue;
621
622 int elems = getElemsInColumn(_vCol[j]);
623
624 if (!elems)
625 continue;
626
627 for (size_t i = 0; i < _vLine.size(); i++)
628 {
629 if (_vLine[i] < 0)
630 continue;
631
632 if (_vLine[i] >= elems)
633 {
634 if (_vLine.isExpanded() && _vLine.isOrdered())
635 break;
636
637 continue;
638 }
639
640 vReturn[j + i * _vCol.size()] = memArray[_vCol[j]]->getValueAsParserString(_vLine[i]);
641 }
642 }
643 }
644
645 return vReturn;
646}
647
648
658{
660
661 for (size_t i = 0; i < _vCol.size(); i++)
662 {
663 if (_vCol[i] >= 0 && (int)memArray.size() > _vCol[i] && memArray[_vCol[i]])
664 {
665 if (type == TableColumn::TYPE_NONE)
666 type = memArray[_vCol[i]]->m_type;
667 else if (type != memArray[_vCol[i]]->m_type
668 && (type > TableColumn::STRINGLIKE || memArray[_vCol[i]]->m_type > TableColumn::STRINGLIKE))
670 }
671 }
672
673 return type;
674}
675
676
686{
687 ValueVector vRet;
688
689 for (size_t i = 0; i < _vCol.size(); i++)
690 {
691 if (_vCol[i] >= 0 && (int)memArray.size() > _vCol[i] && memArray[_vCol[i]])
692 {
693 if (memArray[_vCol[i]]->m_type == TableColumn::TYPE_CATEGORICAL)
694 {
695 const std::vector<std::string>& vCategories = static_cast<CategoricalColumn*>(memArray[_vCol[i]].get())->getCategories();
696
697 for (size_t c = 0; c < vCategories.size(); c++)
698 {
699 vRet.push_back(vCategories[c]);
700 vRet.push_back(toString(c+1));
701 }
702 }
703 }
704 }
705
706 return vRet;
707}
708
709
723Memory* Memory::extractRange(const VectorIndex& _vLine, const VectorIndex& _vCol) const
724{
725 Memory* _memCopy = new Memory();
726
727 _vLine.setOpenEndIndex(getLines(false)-1);
728 _vCol.setOpenEndIndex(getCols(false)-1);
729
730 _memCopy->Allocate(_vCol.size());
731
732 if (_vCol.size() * _vLine.size() > 10000)
733 {
734 #pragma omp parallel for
735 for (size_t j = 0; j < _vCol.size(); j++)
736 {
737 if (_vCol[j] >= 0 && _vCol[j] < (int)memArray.size() && memArray[_vCol[j]])
738 _memCopy->memArray[j].reset(memArray[_vCol[j]]->copy(_vLine));
739 }
740 }
741 else
742 {
743 for (size_t j = 0; j < _vCol.size(); j++)
744 {
745 if (_vCol[j] >= 0 && _vCol[j] < (int)memArray.size() && memArray[_vCol[j]])
746 _memCopy->memArray[j].reset(memArray[_vCol[j]]->copy(_vLine));
747 }
748 }
749
750 _memCopy->m_meta = m_meta;
751 return _memCopy;
752}
753
754
768void Memory::copyElementsInto(vector<mu::value_type>* vTarget, const VectorIndex& _vLine, const VectorIndex& _vCol) const
769{
770 if ((_vLine.size() > 1 && _vCol.size() > 1) || !memArray.size())
771 vTarget->assign(1, NAN);
772 else
773 {
774 vTarget->assign(_vLine.size()*_vCol.size(), NAN);
775
776 //#pragma omp parallel for
777 for (size_t j = 0; j < _vCol.size(); j++)
778 {
779 if (_vCol[j] < 0)
780 continue;
781
782 int elems = getElemsInColumn(_vCol[j]);
783
784 if (!elems)
785 continue;
786
787 for (size_t i = 0; i < _vLine.size(); i++)
788 {
789 if (_vLine[i] < 0)
790 continue;
791
792 if (_vLine[i] >= elems)
793 {
794 if (_vLine.isExpanded() && _vLine.isOrdered())
795 break;
796
797 continue;
798 }
799
800 (*vTarget)[j + i * _vCol.size()] = memArray[_vCol[j]]->getValue(_vLine[i]);
801 }
802 }
803 }
804}
805
806
817bool Memory::isValidElement(size_t _nLine, size_t _nCol) const
818{
819 if (_nCol < memArray.size() && _nCol >= 0 && memArray[_nCol])
820 return !mu::isnan(memArray[_nCol]->getValue(_nLine));
821
822 return false;
823}
824
825
833bool Memory::isValid() const
834{
835 return getLines();
836}
837
838
848{
849 if (!memArray.size())
850 return true;
851
852 // Shrink each column
853 for (TblColPtr& col : memArray)
854 {
855 if (col)
856 col->shrink();
857
858 if (col && !col->size())
859 col.reset(nullptr);
860 }
861
862 nCalcLines = -1;
863
864 // Remove obsolete columns
865 for (int i = memArray.size()-1; i >= 0; i--)
866 {
867 if (memArray[i])
868 {
869 memArray.resize(i+1);
870 return true;
871 }
872 }
873
874 // if this place is reached, delete everything
875 memArray.clear();
876 return true;
877}
878
879
889{
890 #pragma omp parallel for
891 for (size_t i = 0; i < memArray.size(); i++)
892 {
893 if (memArray[i] && memArray[i]->m_type == TableColumn::TYPE_STRING)
894 {
895 TableColumn* col = memArray[i]->convert();
896
897 // Only valid conversions return a non-zero
898 // pointer
899 if (col && col != memArray[i].get())
900 memArray[i].reset(col);
901 }
902 }
903}
904
905
916bool Memory::convertColumns(const VectorIndex& _vCol, const std::string& _sType)
917{
919
920 if (_type == TableColumn::TYPE_NONE)
921 return false;
922
923 _vCol.setOpenEndIndex(memArray.size()-1);
924
925 bool success = true;
926
927 for (size_t i = 0; i < _vCol.size(); i++)
928 {
929 if (_vCol[i] < 0 || _vCol[i] >= (int)memArray.size())
930 continue;
931
932 if (memArray[_vCol[i]] && memArray[_vCol[i]]->m_type != _type)
933 {
934 TableColumn* col = memArray[_vCol[i]]->convert(_type);
935
936 // Only valid conversions return a non-zero
937 // pointer
938 if (col && col != memArray[_vCol[i]].get())
939 memArray[_vCol[i]].reset(col);
940 else
941 success = false;
942 }
943 }
944
945 // If successful: mark the whole table as modified
946 if (success)
947 m_meta.modify();
948
949 return success;
950}
951
952
963bool Memory::setCategories(const VectorIndex& _vCol, const std::vector<std::string>& vCategories)
964{
965 bool success = true;
966 _vCol.setOpenEndIndex(memArray.size()-1);
967
968 for (size_t i = 0; i < _vCol.size(); i++)
969 {
970 if (_vCol[i] < 0 || _vCol[i] >= (int)memArray.size())
971 continue;
972
973 if (memArray[_vCol[i]])
974 {
975 if (memArray[_vCol[i]]->m_type != TableColumn::TYPE_CATEGORICAL)
976 {
977 TableColumn* col = memArray[_vCol[i]]->convert(TableColumn::TYPE_CATEGORICAL);
978
979 // Only valid conversions return a non-zero
980 // pointer
981 if (col && col != memArray[_vCol[i]].get())
982 {
983 memArray[_vCol[i]].reset(col);
984 static_cast<CategoricalColumn*>(col)->setCategories(vCategories);
985 }
986 else
987 success = false;
988 }
989 else
990 static_cast<CategoricalColumn*>(memArray[_vCol[i]].get())->setCategories(vCategories);
991 }
992 }
993
994 if (success)
995 m_meta.modify();
996
997 return success;
998}
999
1000
1010{
1011 return m_meta.isSaved;
1012}
1013
1014
1025std::string Memory::getHeadLineElement(size_t _i) const
1026{
1027 if (_i >= memArray.size() || !memArray[_i])
1029 else
1030 return memArray[_i]->m_sHeadLine;
1031}
1032
1033
1043vector<string> Memory::getHeadLineElement(const VectorIndex& _vCol) const
1044{
1045 vector<string> vHeadLines;
1046
1047 for (unsigned int i = 0; i < _vCol.size(); i++)
1048 {
1049 if (_vCol[i] < 0)
1050 continue;
1051
1052 vHeadLines.push_back(getHeadLineElement(_vCol[i]));
1053 }
1054
1055 return vHeadLines;
1056}
1057
1058
1068bool Memory::setHeadLineElement(size_t _i, const std::string& _sHead)
1069{
1070 if (_i >= memArray.size())
1071 {
1072 if (!resizeMemory(1, _i + 1))
1073 return false;
1074 }
1075
1076 if (!memArray[_i])
1077 memArray[_i].reset(new DEFAULT_COL_TYPE);
1078
1079 memArray[_i]->m_sHeadLine = _sHead;
1080 m_meta.modify();
1081
1082 return true;
1083}
1084
1085
1094void Memory::writeComment(const std::string& comment)
1095{
1096 m_meta.comment = comment;
1097 m_meta.modify();
1098}
1099
1100
1110{
1111 m_meta = meta;
1112}
1113
1114
1122{
1123 m_meta.modify();
1124 nCalcLines = -1;
1125}
1126
1127
1136size_t Memory::getAppendedZeroes(size_t _i) const
1137{
1138 if (_i < memArray.size() && memArray[_i])
1139 return getLines() - memArray[_i]->size();
1140
1141 return getLines();
1142}
1143
1144
1154{
1155 size_t nHeadlineCount = 1;
1156
1157 // Get the dimensions of the complete headline (i.e. including possible linebreaks)
1158 for (const TblColPtr& col : memArray)
1159 {
1160 // No linebreak? Continue
1161 if (!col || col->m_sHeadLine.find('\n') == std::string::npos)
1162 continue;
1163
1164 size_t nLinebreak = 0;
1165
1166 // Count all linebreaks
1167 for (size_t n = 0; n < col->m_sHeadLine.length() - 2; n++)
1168 {
1169 if (col->m_sHeadLine[n] == '\n')
1170 nLinebreak++;
1171 }
1172
1173 // Save the maximal number
1174 if (nLinebreak + 1 > nHeadlineCount)
1175 nHeadlineCount = nLinebreak + 1;
1176 }
1177
1178 return nHeadlineCount;
1179}
1180
1181
1189std::string Memory::getComment() const
1190{
1191 return m_meta.comment;
1192}
1193
1194
1203{
1204 return m_meta;
1205}
1206
1207
1219void Memory::writeData(int _nLine, int _nCol, const mu::value_type& _dData)
1220{
1221 if (!memArray.size() && mu::isnan(_dData))
1222 return;
1223
1224 if ((int)memArray.size() <= _nCol)
1225 resizeMemory(_nLine+1, _nCol+1);
1226
1228 memArray[_nCol]->setValue(_nLine, _dData);
1229
1230 if (nCalcLines != -1 && (mu::isnan(_dData) || _nLine >= nCalcLines))
1231 nCalcLines = -1;
1232
1233 m_meta.modify();
1234}
1235
1236
1249void Memory::writeDataDirect(int _nLine, int _nCol, const mu::value_type& _dData)
1250{
1252 memArray[_nCol]->setValue(_nLine, _dData);
1253}
1254
1255
1271void Memory::writeDataDirectUnsafe(int _nLine, int _nCol, const mu::value_type& _dData)
1272{
1273 memArray[_nCol]->setValue(_nLine, _dData);
1274}
1275
1276
1287void Memory::writeData(int _nLine, int _nCol, const std::string& sValue)
1288{
1289 if (!memArray.size() && !sValue.length())
1290 return;
1291
1292 if ((int)memArray.size() <= _nCol)
1293 resizeMemory(_nLine+1, _nCol+1);
1294
1296 memArray[_nCol]->setValue(_nLine, sValue);
1297
1298 if (!sValue.length() || _nLine >= nCalcLines)
1299 nCalcLines = -1;
1300
1301 // --> Setze den Zeitstempel auf "jetzt", wenn der Memory eben noch gespeichert war <--
1302 m_meta.modify();
1303}
1304
1305
1318void Memory::writeData(Indices& _idx, mu::value_type* _dData, unsigned int _nNum)
1319{
1320 int nDirection = LINES;
1321
1322 if (_nNum == 1)
1323 {
1324 writeSingletonData(_idx, _dData[0]);
1325 return;
1326 }
1327
1328 bool rewriteColumn = false;
1329
1330 if (_idx.row.front() == 0 && _idx.row.isOpenEnd())
1331 rewriteColumn = true;
1332
1333 _idx.row.setOpenEndIndex(_idx.row.front() + _nNum - 1);
1334 _idx.col.setOpenEndIndex(_idx.col.front() + _nNum - 1);
1335
1336 if (_idx.row.size() > 1)
1337 nDirection = COLS;
1338 else if (_idx.col.size() > 1)
1339 nDirection = LINES;
1340
1341 for (size_t i = 0; i < _idx.row.size(); i++)
1342 {
1343 for (size_t j = 0; j < _idx.col.size(); j++)
1344 {
1345 if (!i && _idx.col[j] >= (int)memArray.size())
1346 resizeMemory(i, _idx.col[j]+1);
1347
1348 if (!i)
1350
1351 if (nDirection == COLS)
1352 {
1353 if (!i
1354 && rewriteColumn
1355 && (memArray[_idx.col[j]]->m_type != TableColumn::TYPE_DATETIME || !mu::isreal(_dData, _nNum)))
1357
1358 if (_nNum > i)
1359 {
1360 memArray[_idx.col[j]]->setValue(_idx.row[i], _dData[i]);
1361
1362 if (nCalcLines != -1 && (nCalcLines <= _idx.row[i] || mu::isnan(_dData[i])))
1363 nCalcLines = -1;
1364 }
1365 }
1366 else
1367 {
1368 if (_nNum > j)
1369 {
1370 memArray[_idx.col[j]]->setValue(_idx.row[i], _dData[j]);
1371
1372 if (nCalcLines != -1 && (nCalcLines <= _idx.row[i] || mu::isnan(_dData[j])))
1373 nCalcLines = -1;
1374 }
1375 }
1376 }
1377 }
1378
1379 m_meta.modify();
1380}
1381
1382
1395{
1396 bool rewriteColumn = false;
1397
1398 if (_idx.row.front() == 0 && _idx.row.isOpenEnd())
1399 rewriteColumn = true;
1400
1401 _idx.row.setOpenEndIndex(std::max(_idx.row.front(), getLines(false)) - 1);
1402 _idx.col.setOpenEndIndex(std::max(_idx.col.front(), getCols(false)) - 1);
1403
1404 for (size_t i = 0; i < _idx.row.size(); i++)
1405 {
1406 for (size_t j = 0; j < _idx.col.size(); j++)
1407 {
1408 if (!i
1409 && rewriteColumn
1410 && (int)memArray.size() > _idx.col[j]
1411 && (_dData.imag() || memArray[_idx.col[j]]->m_type != TableColumn::TYPE_DATETIME))
1413
1414 writeData(_idx.row[i], _idx.col[j], _dData);
1415 }
1416 }
1417}
1418
1419
1431void Memory::writeData(Indices& _idx, const ValueVector& _values)
1432{
1433 int nDirection = LINES;
1434
1435 if (_values.size() == 1)
1436 {
1437 writeSingletonData(_idx, _values.front());
1438 return;
1439 }
1440
1441 bool rewriteColumn = false;
1442
1443 if (_idx.row.front() == 0 && _idx.row.isOpenEnd())
1444 rewriteColumn = true;
1445
1446 _idx.row.setOpenEndIndex(_idx.row.front() + _values.size() - 1);
1447 _idx.col.setOpenEndIndex(_idx.col.front() + _values.size() - 1);
1448
1449 if (_idx.row.size() > 1)
1450 nDirection = COLS;
1451 else if (_idx.col.size() > 1)
1452 nDirection = LINES;
1453
1454 for (size_t i = 0; i < _idx.row.size(); i++)
1455 {
1456 for (size_t j = 0; j < _idx.col.size(); j++)
1457 {
1458 if (nDirection == COLS)
1459 {
1460 if (!i && rewriteColumn && (int)memArray.size() > _idx.col[j])
1462
1463 if (_values.size() > i)
1464 writeData(_idx.row[i], _idx.col[j], _values[i]);
1465 }
1466 else
1467 {
1468 if (_values.size() > j)
1469 writeData(_idx.row[i], _idx.col[j], _values[j]);
1470 }
1471 }
1472 }
1473}
1474
1475
1487void Memory::writeSingletonData(Indices& _idx, const std::string& _sValue)
1488{
1489 bool rewriteColumn = false;
1490
1491 if (_idx.row.front() == 0 && _idx.row.isOpenEnd())
1492 rewriteColumn = true;
1493
1494 _idx.row.setOpenEndIndex(std::max(_idx.row.front(), getLines(false)) - 1);
1495 _idx.col.setOpenEndIndex(std::max(_idx.col.front(), getCols(false)) - 1);
1496
1497 for (size_t i = 0; i < _idx.row.size(); i++)
1498 {
1499 for (size_t j = 0; j < _idx.col.size(); j++)
1500 {
1501 if (!i && rewriteColumn && (int)memArray.size() > _idx.col[j])
1503
1504 writeData(_idx.row[i], _idx.col[j], _sValue);
1505 }
1506 }
1507}
1508
1509
1518void Memory::setSaveStatus(bool _bIsSaved)
1519{
1520 if (_bIsSaved)
1521 m_meta.save();
1522 else
1523 m_meta.modify();
1524}
1525
1526
1534long long int Memory::getLastSaved() const
1535{
1536 return m_meta.lastSavedTime;
1537}
1538
1539
1555vector<int> Memory::sortElements(int i1, int i2, int j1, int j2, const std::string& sSortingExpression)
1556{
1557 if (!memArray.size())
1558 return vector<int>();
1559
1560 bool bError = false;
1561 bool bReturnIndex = false;
1562 bSortCaseInsensitive = findParameter(sSortingExpression, "ignorecase");
1563 int nSign = 1;
1564
1565 i1 = std::max(0, i1);
1566 j1 = std::max(0, j1);
1567
1568 vector<int> vIndex;
1569
1570 // Determine the sorting direction
1571 if (findParameter(sSortingExpression, "desc"))
1572 nSign = -1;
1573
1574 if (i2 == -1)
1575 i2 = i1;
1576
1577 if (j2 == -1)
1578 j2 = j1;
1579
1580 // Prepare the sorting index
1581 for (int i = i1; i <= i2; i++)
1582 vIndex.push_back(i);
1583
1584 // Evaluate, whether an index shall be returned
1585 // (instead of actual reordering the columns)
1586 if (findParameter(sSortingExpression, "index"))
1587 bReturnIndex = true;
1588
1589 // Is a column group selected or do we actually
1590 // sort everything?
1591 if (!findParameter(sSortingExpression, "cols", '=') && !findParameter(sSortingExpression, "c", '='))
1592 {
1593 // Make a copy of the global index for the private threads
1594 std::vector<int> vPrivateIndex = vIndex;
1595
1596 // Sort everything independently (we use vIndex from
1597 // the outside, we therefore must declare it as firstprivate)
1598 #pragma omp parallel for firstprivate(vPrivateIndex)
1599 for (int i = j1; i <= j2; i++)
1600 {
1601 // Change for OpenMP
1602 if (i > j1 && bReturnIndex)
1603 continue;
1604
1605 // Sort the current column
1606 if (!qSort(&vPrivateIndex[0], i2 - i1 + 1, i, 0, i2 - i1, nSign))
1608
1609 // Abort after the first column, if
1610 // an index shall be returned
1611 // Continue is a change for OpenMP
1612 if (bReturnIndex)
1613 {
1614 vIndex = vPrivateIndex;
1615 continue;
1616 }
1617
1618 // Actually reorder the column
1619 reorderColumn(vPrivateIndex, i1, i2, i);
1620
1621 // Reset the sorting index
1622 for (int j = i1; j <= i2; j++)
1623 vPrivateIndex[j-i1] = j;
1624 }
1625 }
1626 else
1627 {
1628 // Sort groups of columns (including
1629 // hierarchical sorting)
1630 string sCols = "";
1631
1632 // Find the column group definition
1633 if (findParameter(sSortingExpression, "cols", '='))
1634 sCols = getArgAtPos(sSortingExpression, findParameter(sSortingExpression, "cols", '=') + 4);
1635 else
1636 sCols = getArgAtPos(sSortingExpression, findParameter(sSortingExpression, "c", '=') + 1);
1637
1638 // As long as the column group definition
1639 // has a length
1640 while (sCols.length())
1641 {
1642 // Get a new column keys instance
1643 ColumnKeys* keys = evaluateKeyList(sCols, j2 - j1 + 1);
1644
1645 // Ensure that we obtained an actual
1646 // instance
1647 if (!keys)
1649
1650 if (keys->nKey[1] == -1)
1651 keys->nKey[1] = keys->nKey[0] + 1;
1652
1653 // Go through the group definition
1654 for (int j = keys->nKey[0]; j < keys->nKey[1]; j++)
1655 {
1656 // Sort the current key list level
1657 // independently
1658 if (!qSort(&vIndex[0], i2 - i1 + 1, j + j1, 0, i2 - i1, nSign))
1659 {
1660 delete keys;
1662 }
1663
1664 // Subkey list: sort the subordinate group
1665 // depending on the higher-level key group
1666 if (keys->subkeys && keys->subkeys->subkeys)
1667 {
1668 if (!sortSubList(&vIndex[0], i2 - i1 + 1, keys, i1, i2, j1, nSign, getCols(false)))
1669 {
1670 delete keys;
1672 }
1673 }
1674
1675 // Break, if the index shall be returned
1676 if (bReturnIndex)
1677 break;
1678
1679 // Actually reorder the current column
1680 reorderColumn(vIndex, i1, i2, j + j1);
1681
1682 // Obtain the subkey list
1683 ColumnKeys* subKeyList = keys->subkeys;
1684
1685 // As long as a subkey list is available
1686 while (subKeyList)
1687 {
1688 if (subKeyList->nKey[1] == -1)
1689 subKeyList->nKey[1] = subKeyList->nKey[0] + 1;
1690
1691 // Reorder the subordinate key list
1692 for (int _j = subKeyList->nKey[0]; _j < subKeyList->nKey[1]; _j++)
1693 reorderColumn(vIndex, i1, i2, _j + j1);
1694
1695 // Find the next subordinate list
1696 subKeyList = subKeyList->subkeys;
1697 }
1698
1699 // Reset the sorting index for the next column
1700 for (int _j = i1; _j <= i2; _j++)
1701 vIndex[_j-i1] = _j;
1702 }
1703
1704 // Free the occupied memory
1705 delete keys;
1706
1707 if (bReturnIndex)
1708 break;
1709 }
1710 }
1711
1712 // Number of lines might have changed
1713 nCalcLines = -1;
1714
1715 // Increment each index value, if the index
1716 // vector shall be returned
1717 if (bReturnIndex)
1718 {
1719 for (int i = 0; i <= i2 - i1; i++)
1720 vIndex[i]++;
1721 }
1722
1723 m_meta.modify();
1724
1725 if (bError || !bReturnIndex)
1726 return vector<int>();
1727
1728 return vIndex;
1729}
1730
1731
1744void Memory::reorderColumn(const VectorIndex& vIndex, int i1, int i2, int j1)
1745{
1746 if ((int)memArray.size() > j1 && memArray[j1])
1747 {
1748 TblColPtr col(memArray[j1]->copy(vIndex));
1749 memArray[j1]->insert(VectorIndex(i1, i2), col.get());
1750 memArray[j1]->shrink();
1751 }
1752}
1753
1754
1767int Memory::compare(int i, int j, int col)
1768{
1769 if (col < (int)memArray.size() && memArray[col])
1770 return memArray[col]->compare(i, j, bSortCaseInsensitive);
1771
1772 return 0;
1773}
1774
1775
1786bool Memory::isValue(int line, int col)
1787{
1788 if (col < (int)memArray.size() && memArray[col])
1789 return memArray[col]->isValid(line);
1790
1791 return false;
1792}
1793
1794
1803NumeRe::Table Memory::extractTable(const string& _sTable, const VectorIndex& lines, const VectorIndex& cols)
1804{
1805 lines.setOpenEndIndex(getLines(false)-1);
1806 cols.setOpenEndIndex(getCols(false)-1);
1807
1808 NumeRe::Table table(lines.size(), cols.size());
1809
1810 table.setName(_sTable);
1811 table.setMetaData(m_meta);
1812
1813 #pragma omp parallel for
1814 for (size_t j = 0; j < cols.size(); j++)
1815 {
1816 if (cols[j] < (int)memArray.size() && memArray[cols[j]])
1817 table.setColumn(j, memArray[cols[j]]->copy(lines));
1818 }
1819
1820 return table;
1821}
1822
1823
1834void Memory::importTable(NumeRe::Table _table, const VectorIndex& lines, const VectorIndex& cols)
1835{
1836 // We construct separate objects because they might be overwritten
1837 deleteBulk(VectorIndex(lines), VectorIndex(cols));
1838
1839 lines.setOpenEndIndex(lines.front() + _table.getLines()-1);
1840 cols.setOpenEndIndex(cols.front() + _table.getCols()-1);
1841
1842 resizeMemory(lines.max()+1, cols.max()+1);
1843 m_meta = _table.getMetaData();
1844
1845 #pragma omp parallel for
1846 for (size_t j = 0; j < _table.getCols(); j++)
1847 {
1848 if (j >= cols.size())
1849 continue;
1850
1851 TableColumn* tabCol = _table.getColumn(j);
1852
1853 if (!tabCol)
1854 continue;
1855
1856 if (!memArray[cols[j]])
1857 {
1858 if (tabCol->m_type == TableColumn::TYPE_VALUE)
1859 memArray[cols[j]].reset(new ValueColumn);
1860 else if (tabCol->m_type == TableColumn::TYPE_DATETIME)
1861 memArray[cols[j]].reset(new DateTimeColumn);
1862 else if (tabCol->m_type == TableColumn::TYPE_STRING)
1863 memArray[cols[j]].reset(new StringColumn);
1864 else if (tabCol->m_type == TableColumn::TYPE_LOGICAL)
1865 memArray[cols[j]].reset(new LogicalColumn);
1866 else if (tabCol->m_type == TableColumn::TYPE_CATEGORICAL)
1867 memArray[cols[j]].reset(new CategoricalColumn);
1868 else
1869 {
1870 NumeReKernel::issueWarning("In Memory::importTable(): TableColumn::ColumnType not implemented.");
1871 continue;
1872 }
1873 }
1874
1875 memArray[cols[j]]->insert(lines, tabCol);
1876 memArray[cols[j]]->m_sHeadLine = tabCol->m_sHeadLine;
1877 }
1878
1879 // Try to convert string- to valuecolumns
1880 convert();
1881 m_meta.modify();
1882}
1883
1884
1897bool Memory::save(string _sFileName, const string& sTableName, unsigned short nPrecision)
1898{
1899 // Get an instance of the desired file type
1900 NumeRe::GenericFile* file = NumeRe::getFileByType(_sFileName);
1901
1902 // Ensure that a file was created
1903 if (!file)
1905
1906 int lines = getLines(false);
1907 int cols = getCols(false);
1908
1909 // Set the dimensions and the generic information
1910 // in the file
1911 file->setDimensions(lines, cols);
1912 file->setData(&memArray, lines, cols);
1913 file->setTableName(sTableName);
1914 file->setTextfilePrecision(nPrecision);
1915
1916 // If the file type is a NumeRe data file, then
1917 // we can also set the comment associated with
1918 // this memory page
1919 if (file->getExtension() == "ndat")
1920 static_cast<NumeRe::NumeReDataFile*>(file)->setComment(m_meta.comment);
1921
1922 // Try to write the data to the file. This might
1923 // either result in writing errors or the write
1924 // function is not defined for this file type
1925 try
1926 {
1927 if (!file->write())
1929 }
1930 catch (...)
1931 {
1932 delete file;
1933 throw;
1934 }
1935
1936 // Delete the created file instance
1937 delete file;
1938
1939 return true;
1940}
1941
1942
1952void Memory::deleteEntry(int _nLine, int _nCol)
1953{
1954 if ((int)memArray.size() > _nCol && memArray[_nCol])
1955 {
1956 if (memArray[_nCol]->isValid(_nLine))
1957 {
1958 // Delete the element
1959 memArray[_nCol]->deleteElements(VectorIndex(_nLine));
1960 m_meta.modify();
1961
1962 // Evaluate, whether we can remove
1963 // the column from memory
1964 if (!_nLine && !memArray[_nCol]->size())
1965 memArray[_nCol].reset(nullptr);
1966
1967 nCalcLines = -1;
1968 }
1969 }
1970}
1971
1972
1982void Memory::deleteBulk(const VectorIndex& _vLine, const VectorIndex& _vCol)
1983{
1984 if (!memArray.size())
1985 return;
1986
1987 _vLine.setOpenEndIndex(getLines()-1);
1988 _vCol.setOpenEndIndex(getCols()-1);
1989
1990 bool bHasFirstLine = _vLine.min() == 0;
1991
1992 // Delete the selected entries
1993 #pragma omp parallel for
1994 for (size_t j = 0; j < _vCol.size(); j++)
1995 {
1996 if (_vCol[j] >= 0 && _vCol[j] < (int)memArray.size() && memArray[_vCol[j]])
1997 memArray[_vCol[j]]->deleteElements(_vLine);
1998 }
1999
2000 m_meta.modify();
2001
2002 // Remove all invalid elements and columns
2003 if (bHasFirstLine)
2004 shrink();
2005
2006 nCalcLines = -1;
2007}
2008
2009
2021void Memory::calculateStats(const VectorIndex& _vLine, const VectorIndex& _vCol, std::vector<StatsLogic>& operation) const
2022{
2023 constexpr size_t MINTHREADCOUNT = 16;
2024 constexpr size_t MINELEMENTPERCOL = 1000;
2025
2026 // Only apply multiprocessing, if there are really a lot of
2027 // elements to process
2028 if (operation.size() >= MINTHREADCOUNT && _vLine.size() >= MINELEMENTPERCOL)
2029 {
2030 #pragma omp parallel for
2031 for (size_t j = 0; j < _vCol.size(); j++)
2032 {
2033 if (_vCol[j] < 0)
2034 continue;
2035
2036 int elems = getElemsInColumn(_vCol[j]);
2037
2038 if (!elems)
2039 continue;
2040
2041 for (size_t i = 0; i < _vLine.size(); i++)
2042 {
2043 if (_vLine[i] < 0)
2044 continue;
2045
2046 if (_vLine[i] >= elems)
2047 {
2048 if (_vLine.isExpanded() && _vLine.isOrdered())
2049 break;
2050
2051 continue;
2052 }
2053
2054 operation[j](readMem(_vLine[i], _vCol[j]));
2055 }
2056 }
2057 }
2058 else
2059 {
2060 for (size_t j = 0; j < _vCol.size(); j++)
2061 {
2062 if (_vCol[j] < 0)
2063 continue;
2064
2065 int elems = getElemsInColumn(_vCol[j]);
2066
2067 if (!elems)
2068 continue;
2069
2070 for (size_t i = 0; i < _vLine.size(); i++)
2071 {
2072 if (_vLine[i] < 0)
2073 continue;
2074
2075 if (_vLine[i] >= elems)
2076 {
2077 if (_vLine.isExpanded() && _vLine.isOrdered())
2078 break;
2079
2080 continue;
2081 }
2082
2083 operation[j](readMem(_vLine[i], _vCol[j]));
2084 }
2085 }
2086 }
2087}
2088
2089
2099mu::value_type Memory::std(const VectorIndex& _vLine, const VectorIndex& _vCol) const
2100{
2101 if (!memArray.size())
2102 return NAN;
2103
2104 mu::value_type dAvg = avg(_vLine, _vCol);
2105 mu::value_type dStd = 0.0;
2106
2107 int lines = getLines(false);
2108 int cols = getCols(false);
2109
2110 _vLine.setOpenEndIndex(lines-1);
2111 _vCol.setOpenEndIndex(cols-1);
2112
2113 std::vector<StatsLogic> vLogic(_vCol.size(), StatsLogic(StatsLogic::OPERATION_ADDSQSUB, 0.0, dAvg));
2114 calculateStats(_vLine, _vCol, vLogic);
2115
2116 for (const auto& val : vLogic)
2117 dStd += val.m_val;
2118
2119 return sqrt(dStd / (num(_vLine, _vCol) - 1.0));
2120}
2121
2122
2132mu::value_type Memory::avg(const VectorIndex& _vLine, const VectorIndex& _vCol) const
2133{
2134 if (!memArray.size())
2135 return NAN;
2136
2137 return sum(_vLine, _vCol) / num(_vLine, _vCol);
2138}
2139
2140
2150mu::value_type Memory::max(const VectorIndex& _vLine, const VectorIndex& _vCol) const
2151{
2152 if (!memArray.size())
2153 return NAN;
2154
2155 double dMax = NAN;
2156
2157 int lines = getLines(false);
2158 int cols = getCols(false);
2159
2160 _vLine.setOpenEndIndex(lines-1);
2161 _vCol.setOpenEndIndex(cols-1);
2162
2163 std::vector<StatsLogic> vLogic(_vCol.size(), StatsLogic(StatsLogic::OPERATION_MAX, NAN));
2164 calculateStats(_vLine, _vCol, vLogic);
2165
2166 for (const auto& val : vLogic)
2167 {
2168 if (isnan(dMax) || dMax < val.m_val.real())
2169 dMax = val.m_val.real();
2170 }
2171
2172 return dMax;
2173}
2174
2175
2185mu::value_type Memory::min(const VectorIndex& _vLine, const VectorIndex& _vCol) const
2186{
2187 if (!memArray.size())
2188 return NAN;
2189
2190 double dMin = NAN;
2191
2192 int lines = getLines(false);
2193 int cols = getCols(false);
2194
2195 _vLine.setOpenEndIndex(lines-1);
2196 _vCol.setOpenEndIndex(cols-1);
2197
2198 std::vector<StatsLogic> vLogic(_vCol.size(), StatsLogic(StatsLogic::OPERATION_MIN, NAN));
2199 calculateStats(_vLine, _vCol, vLogic);
2200
2201 for (const auto& val : vLogic)
2202 {
2203 if (isnan(dMin) || dMin > val.m_val.real())
2204 dMin = val.m_val.real();
2205 }
2206
2207 return dMin;
2208}
2209
2210
2220mu::value_type Memory::prd(const VectorIndex& _vLine, const VectorIndex& _vCol) const
2221{
2222 if (!memArray.size())
2223 return NAN;
2224
2225 mu::value_type dPrd = 1.0;
2226
2227 int lines = getLines(false);
2228 int cols = getCols(false);
2229
2230 _vLine.setOpenEndIndex(lines-1);
2231 _vCol.setOpenEndIndex(cols-1);
2232
2233 std::vector<StatsLogic> vLogic(_vCol.size(), StatsLogic(StatsLogic::OPERATION_MULT, 1.0));
2234 calculateStats(_vLine, _vCol, vLogic);
2235
2236 for (const auto& val : vLogic)
2237 {
2238 dPrd *= val.m_val;
2239 }
2240
2241 return dPrd;
2242}
2243
2244
2254mu::value_type Memory::sum(const VectorIndex& _vLine, const VectorIndex& _vCol) const
2255{
2256 if (!memArray.size())
2257 return NAN;
2258
2259 mu::value_type dSum = 0.0;
2260
2261 int lines = getLines(false);
2262 int cols = getCols(false);
2263
2264 _vLine.setOpenEndIndex(lines-1);
2265 _vCol.setOpenEndIndex(cols-1);
2266
2267 std::vector<StatsLogic> vLogic(_vCol.size(), StatsLogic(StatsLogic::OPERATION_ADD));
2268 calculateStats(_vLine, _vCol, vLogic);
2269
2270 for (const auto& val : vLogic)
2271 {
2272 dSum += val.m_val;
2273 }
2274
2275 return dSum;
2276}
2277
2278
2288mu::value_type Memory::num(const VectorIndex& _vLine, const VectorIndex& _vCol) const
2289{
2290 if (!memArray.size())
2291 return 0;
2292
2293 int nInvalid = 0;
2294
2295 int lines = getLines(false);
2296 int cols = getCols(false);
2297
2298 _vLine.setOpenEndIndex(lines-1);
2299 _vCol.setOpenEndIndex(cols-1);
2300
2301 for (unsigned int j = 0; j < _vCol.size(); j++)
2302 {
2303 if (_vCol[j] < 0)
2304 continue;
2305
2306 int elems = getElemsInColumn(_vCol[j]);
2307
2308 if (!elems)
2309 {
2310 nInvalid += _vLine.size();
2311 continue;
2312 }
2313
2314 for (unsigned int i = 0; i < _vLine.size(); i++)
2315 {
2316 if (_vLine[i] < 0 || _vLine[i] >= elems || mu::isnan(readMem(_vLine[i], _vCol[j])))
2317 nInvalid++;
2318 }
2319 }
2320
2321 return (_vLine.size() * _vCol.size()) - nInvalid;
2322}
2323
2324
2334mu::value_type Memory::and_func(const VectorIndex& _vLine, const VectorIndex& _vCol) const
2335{
2336 if (!memArray.size())
2337 return 0.0;
2338
2339 int lines = getLines(false);
2340 int cols = getCols(false);
2341
2342 _vLine.setOpenEndIndex(lines-1);
2343 _vCol.setOpenEndIndex(cols-1);
2344
2345 double dRetVal = NAN;
2346
2347 for (unsigned int j = 0; j < _vCol.size(); j++)
2348 {
2349 if (_vCol[j] < 0)
2350 continue;
2351
2352 int elems = getElemsInColumn(_vCol[j]);
2353
2354 if (!elems)
2355 continue;
2356
2357 for (unsigned int i = 0; i < _vLine.size(); i++)
2358 {
2359 if (_vLine[i] < 0)
2360 continue;
2361
2362 if (_vLine[i] >= elems)
2363 {
2364 if (_vLine.isExpanded() && _vLine.isOrdered())
2365 break;
2366
2367 continue;
2368 }
2369
2370 if (isnan(dRetVal))
2371 dRetVal = 1.0;
2372
2373 if (!memArray[j] || !memArray[j]->asBool(i))
2374 return 0.0;
2375 }
2376 }
2377
2378 if (isnan(dRetVal))
2379 return 0.0;
2380
2381 return 1.0;
2382}
2383
2384
2394mu::value_type Memory::or_func(const VectorIndex& _vLine, const VectorIndex& _vCol) const
2395{
2396 if (!memArray.size())
2397 return 0.0;
2398
2399 int lines = getLines(false);
2400 int cols = getCols(false);
2401
2402 _vLine.setOpenEndIndex(lines-1);
2403 _vCol.setOpenEndIndex(cols-1);
2404
2405 for (unsigned int j = 0; j < _vCol.size(); j++)
2406 {
2407 if (_vCol[j] < 0)
2408 continue;
2409
2410 int elems = getElemsInColumn(_vCol[j]);
2411
2412 if (!elems)
2413 continue;
2414
2415 for (unsigned int i = 0; i < _vLine.size(); i++)
2416 {
2417 if (_vLine[i] < 0)
2418 continue;
2419
2420 if (_vLine[i] >= elems)
2421 {
2422 if (_vLine.isExpanded() && _vLine.isOrdered())
2423 break;
2424
2425 continue;
2426 }
2427
2428 if (memArray[j] && memArray[j]->asBool(i))
2429 return 1.0;
2430 }
2431 }
2432
2433 return 0.0;
2434}
2435
2436
2446mu::value_type Memory::xor_func(const VectorIndex& _vLine, const VectorIndex& _vCol) const
2447{
2448 if (!memArray.size())
2449 return 0.0;
2450
2451 int lines = getLines(false);
2452 int cols = getCols(false);
2453
2454 _vLine.setOpenEndIndex(lines-1);
2455 _vCol.setOpenEndIndex(cols-1);
2456
2457 bool isTrue = false;
2458
2459 for (unsigned int j = 0; j < _vCol.size(); j++)
2460 {
2461 if (_vCol[j] < 0)
2462 continue;
2463
2464 int elems = getElemsInColumn(_vCol[j]);
2465
2466 if (!elems)
2467 continue;
2468
2469 for (unsigned int i = 0; i < _vLine.size(); i++)
2470 {
2471 if (_vLine[i] < 0)
2472 continue;
2473
2474 if (_vLine[i] >= elems)
2475 {
2476 if (_vLine.isExpanded() && _vLine.isOrdered())
2477 break;
2478
2479 continue;
2480 }
2481
2482 if (memArray[j] && memArray[j]->asBool(i))
2483 {
2484 if (!isTrue)
2485 isTrue = true;
2486 else
2487 return 0.0;
2488 }
2489 }
2490 }
2491
2492 if (isTrue)
2493 return 1.0;
2494
2495 return 0.0;
2496}
2497
2498
2508mu::value_type Memory::cnt(const VectorIndex& _vLine, const VectorIndex& _vCol) const
2509{
2510 if (!memArray.size())
2511 return 0;
2512
2513 int nInvalid = 0;
2514
2515 int lines = getLines(false);
2516 int cols = getCols(false);
2517
2518 _vLine.setOpenEndIndex(lines-1);
2519 _vCol.setOpenEndIndex(cols-1);
2520
2521 for (unsigned int j = 0; j < _vCol.size(); j++)
2522 {
2523 if (_vCol[j] < 0)
2524 continue;
2525
2526 int elems = getElemsInColumn(_vCol[j]);
2527
2528 if (!elems)
2529 continue;
2530
2531 for (unsigned int i = 0; i < _vLine.size(); i++)
2532 {
2533 if (_vLine[i] < 0 || _vLine[i] >= elems)
2534 nInvalid++;
2535 }
2536 }
2537
2538 return (_vLine.size() * _vCol.size()) - nInvalid;
2539}
2540
2541
2551mu::value_type Memory::norm(const VectorIndex& _vLine, const VectorIndex& _vCol) const
2552{
2553 if (!memArray.size())
2554 return NAN;
2555
2556 mu::value_type dNorm = 0.0;
2557
2558 int lines = getLines(false);
2559 int cols = getCols(false);
2560
2561 _vLine.setOpenEndIndex(lines-1);
2562 _vCol.setOpenEndIndex(cols-1);
2563
2564 std::vector<StatsLogic> vLogic(_vCol.size(), StatsLogic(StatsLogic::OPERATION_ADDSQ));
2565 calculateStats(_vLine, _vCol, vLogic);
2566
2567 for (const auto& val : vLogic)
2568 {
2569 dNorm += val.m_val;
2570 }
2571
2572 return sqrt(dNorm);
2573}
2574
2575
2587mu::value_type Memory::cmp(const VectorIndex& _vLine, const VectorIndex& _vCol, mu::value_type dRef, int _nType) const
2588{
2589 if (!memArray.size())
2590 return NAN;
2591
2592 int lines = getLines(false);
2593 int cols = getCols(false);
2594
2595 _vLine.setOpenEndIndex(lines-1);
2596 _vCol.setOpenEndIndex(cols-1);
2597
2598 enum
2599 {
2600 RETURN_VALUE = 1,
2601 RETURN_LE = 2,
2602 RETURN_GE = 4,
2603 RETURN_FIRST = 8
2604 };
2605
2606 int nType = 0;
2607
2608 double dKeep = dRef.real();
2609 int nKeep = -1;
2610
2611 if (_nType > 0)
2612 nType = RETURN_GE;
2613 else if (_nType < 0)
2614 nType = RETURN_LE;
2615
2616 switch (intCast(fabs(_nType)))
2617 {
2618 case 2:
2619 nType |= RETURN_VALUE;
2620 break;
2621 case 3:
2622 nType |= RETURN_FIRST;
2623 break;
2624 case 4:
2625 nType |= RETURN_FIRST | RETURN_VALUE;
2626 break;
2627 }
2628
2629 for (long long int j = 0; j < _vCol.size(); j++)
2630 {
2631 if (_vCol[j] < 0)
2632 continue;
2633
2634 int elems = getElemsInColumn(_vCol[j]);
2635
2636 if (!elems)
2637 continue;
2638
2639 for (long long int i = 0; i < _vLine.size(); i++)
2640 {
2641 if (_vLine[i] < 0)
2642 continue;
2643
2644 if (_vLine[i] >= elems)
2645 {
2646 if (_vLine.isExpanded() && _vLine.isOrdered())
2647 break;
2648
2649 continue;
2650 }
2651
2652 mu::value_type val = readMem(_vLine[i], _vCol[j]);
2653
2654 if (mu::isnan(val))
2655 continue;
2656
2657 if (val == dRef)
2658 {
2659 if (nType & RETURN_VALUE)
2660 return val;
2661
2662 if (_vLine[0] == _vLine[_vLine.size() - 1])
2663 return _vCol[j] + 1;
2664
2665 return _vLine[i] + 1;
2666 }
2667 else if (nType & RETURN_GE && val.real() > dRef.real())
2668 {
2669 if (nType & RETURN_FIRST)
2670 {
2671 if (nType & RETURN_VALUE)
2672 return val.real();
2673
2674 if (_vLine[0] == _vLine[_vLine.size() - 1])
2675 return _vCol[j] + 1;
2676
2677 return _vLine[i] + 1;
2678 }
2679
2680 if (nKeep == -1 || val.real() < dKeep)
2681 {
2682 dKeep = val.real();
2683 if (_vLine[0] == _vLine[_vLine.size() - 1])
2684 nKeep = _vCol[j];
2685 else
2686 nKeep = _vLine[i];
2687 }
2688 else
2689 continue;
2690 }
2691 else if (nType & RETURN_LE && val.real() < dRef.real())
2692 {
2693 if (nType & RETURN_FIRST)
2694 {
2695 if (nType & RETURN_VALUE)
2696 return val.real();
2697
2698 if (_vLine[0] == _vLine[_vLine.size() - 1])
2699 return _vCol[j] + 1;
2700
2701 return _vLine[i] + 1;
2702 }
2703
2704 if (nKeep == -1 || val.real() > dKeep)
2705 {
2706 dKeep = val.real();
2707 if (_vLine[0] == _vLine[_vLine.size() - 1])
2708 nKeep = _vCol[j];
2709 else
2710 nKeep = _vLine[i];
2711 }
2712 else
2713 continue;
2714 }
2715 }
2716 }
2717
2718 if (nKeep == -1)
2719 return NAN;
2720 else if (nType & RETURN_VALUE)
2721 return dKeep;
2722 else
2723 return nKeep + 1;
2724}
2725
2726
2736mu::value_type Memory::med(const VectorIndex& _vLine, const VectorIndex& _vCol) const
2737{
2738 if (!memArray.size())
2739 return NAN;
2740
2741 int lines = getLines(false);
2742 int cols = getCols(false);
2743
2744 _vLine.setOpenEndIndex(lines-1);
2745 _vCol.setOpenEndIndex(cols-1);
2746
2747 vector<double> vData;
2748
2749 vData.reserve(_vLine.size()*_vCol.size());
2750
2751 for (unsigned int j = 0; j < _vCol.size(); j++)
2752 {
2753 if (_vCol[j] < 0)
2754 continue;
2755
2756 int elems = getElemsInColumn(_vCol[j]);
2757
2758 if (!elems)
2759 continue;
2760
2761 for (unsigned int i = 0; i < _vLine.size(); i++)
2762 {
2763 if (_vLine[i] < 0)
2764 continue;
2765
2766 if (_vLine[i] >= elems)
2767 {
2768 if (_vLine.isExpanded() && _vLine.isOrdered())
2769 break;
2770
2771 continue;
2772 }
2773
2774 mu::value_type val = readMem(_vLine[i], _vCol[j]);
2775
2776 if (!mu::isnan(val))
2777 vData.push_back(val.real());
2778 }
2779 }
2780
2781 if (!vData.size())
2782 return NAN;
2783
2784 size_t nCount = qSortDouble(&vData[0], vData.size());
2785
2786 if (!nCount)
2787 return NAN;
2788
2789 return gsl_stats_median_from_sorted_data(&vData[0], 1, nCount);
2790}
2791
2792
2803mu::value_type Memory::pct(const VectorIndex& _vLine, const VectorIndex& _vCol, mu::value_type dPct) const
2804{
2805 if (!memArray.size())
2806 return NAN;
2807
2808 int lines = getLines(false);
2809 int cols = getCols(false);
2810
2811 _vLine.setOpenEndIndex(lines-1);
2812 _vCol.setOpenEndIndex(cols-1);
2813
2814 vector<double> vData;
2815
2816 vData.reserve(_vLine.size()*_vCol.size());
2817
2818 if (dPct.real() >= 1 || dPct.real() <= 0)
2819 return NAN;
2820
2821 for (unsigned int j = 0; j < _vCol.size(); j++)
2822 {
2823 if (_vCol[j] < 0)
2824 continue;
2825
2826 int elems = getElemsInColumn(_vCol[j]);
2827
2828 if (!elems)
2829 continue;
2830
2831 for (unsigned int i = 0; i < _vLine.size(); i++)
2832 {
2833 if (_vLine[i] < 0)
2834 continue;
2835
2836 if (_vLine[i] >= elems)
2837 {
2838 if (_vLine.isExpanded() && _vLine.isOrdered())
2839 break;
2840
2841 continue;
2842 }
2843
2844 mu::value_type val = readMem(_vLine[i], _vCol[j]);
2845
2846 if (!mu::isnan(val))
2847 vData.push_back(val.real());
2848 }
2849 }
2850
2851 if (!vData.size())
2852 return NAN;
2853
2854
2855 size_t nCount = qSortDouble(&vData[0], vData.size());
2856
2857 if (!nCount)
2858 return NAN;
2859
2860 return gsl_stats_quantile_from_sorted_data(&vData[0], 1, nCount, dPct.real());
2861}
2862
2863
2873std::vector<mu::value_type> Memory::size(const VectorIndex& _vIndex, int dir) const
2874{
2875 if (!memArray.size())
2876 return std::vector<mu::value_type>(2, 0.0);
2877
2878 int lines = getLines(false);
2879 int cols = getCols(false);
2880
2881 _vIndex.setOpenEndIndex(dir & LINES ? lines-1 : cols-1);
2882 int nGridOffset = 2*((dir & GRID) != 0);
2883
2884 // Handle simple things first
2885 if (dir == ALL)
2886 return std::vector<mu::value_type>({lines, cols});
2887 else if (dir == GRID)
2888 return std::vector<mu::value_type>({getFilledElemsInColumn(0), getFilledElemsInColumn(1)});
2889 else if (dir & LINES)
2890 {
2891 // Compute the sizes of the table rows
2892 std::vector<mu::value_type> vSizes;
2893
2894 for (size_t i = 0; i < _vIndex.size(); i++)
2895 {
2896 if (_vIndex[i] < 0 || _vIndex[i] >= lines)
2897 continue;
2898
2899 for (int j = memArray.size()-1; j >= 0; j--)
2900 {
2901 if (memArray[j] && memArray[j]->isValid(_vIndex[i]))
2902 {
2903 vSizes.push_back(j+1 - nGridOffset);
2904 break;
2905 }
2906 }
2907 }
2908
2909 if (!vSizes.size())
2910 vSizes.push_back(NAN);
2911
2912 return vSizes;
2913 }
2914 else if (dir & COLS)
2915 {
2916 // Compute the sizes of the table columns
2917 std::vector<mu::value_type> vSizes;
2918
2919 for (size_t j = 0; j < _vIndex.size(); j++)
2920 {
2921 if (_vIndex[j] < nGridOffset || _vIndex[j] >= cols)
2922 continue;
2923
2924 vSizes.push_back(getElemsInColumn(_vIndex[j]));
2925 }
2926
2927 if (!vSizes.size())
2928 vSizes.push_back(NAN);
2929
2930 return vSizes;
2931 }
2932
2933 return std::vector<mu::value_type>(2, 0.0);
2934}
2935
2936
2946std::vector<mu::value_type> Memory::minpos(const VectorIndex& _vIndex, int dir) const
2947{
2948 if (!memArray.size())
2949 return std::vector<mu::value_type>(1, NAN);
2950
2951 int lines = getLines(false);
2952 int cols = getCols(false);
2953
2954 _vIndex.setOpenEndIndex(dir & COLS ? cols-1 : lines-1);
2955 int nGridOffset = 2*((dir & GRID) != 0);
2956
2957 // If a grid is required, get the grid dimensions
2958 // of this table
2959 if (nGridOffset)
2960 {
2961 std::vector<mu::value_type> vSize = size(VectorIndex(), GRID);
2962 lines = vSize.front().real();
2963 cols = vSize.back().real()+nGridOffset; // compensate the offset
2964 }
2965
2966 // A special case for the columns. We will compute the
2967 // results for ALL and GRID using the results for LINES
2968 if (dir & COLS)
2969 {
2970 std::vector<mu::value_type> vPos;
2971
2972 for (size_t j = 0; j < _vIndex.size(); j++)
2973 {
2974 if (_vIndex[j] < nGridOffset || _vIndex[j] >= cols)
2975 continue;
2976
2977 vPos.push_back(cmp(VectorIndex(0, VectorIndex::OPEN_END), VectorIndex(_vIndex[j]), min(VectorIndex(0, VectorIndex::OPEN_END), VectorIndex(_vIndex[j])), 0));
2978 }
2979
2980 if (!vPos.size())
2981 vPos.push_back(NAN);
2982
2983 return vPos;
2984 }
2985
2986 std::vector<mu::value_type> vPos;
2987 double dMin = NAN;
2988 size_t pos = 0;
2989
2990 // Compute the results for LINES and find as
2991 // well the global minimal value, which will be used
2992 // for GRID and ALL
2993 for (size_t i = 0; i < _vIndex.size(); i++)
2994 {
2995 if (_vIndex[i] < 0 || _vIndex[i] >= lines)
2996 continue;
2997
2998 vPos.push_back(cmp(VectorIndex(_vIndex[i]), VectorIndex(nGridOffset, VectorIndex::OPEN_END), min(VectorIndex(_vIndex[i]), VectorIndex(nGridOffset, VectorIndex::OPEN_END)), 0));
2999
3000 if (isnan(dMin) || dMin > readMem(_vIndex[i], intCast(vPos.back())-1).real())
3001 {
3002 dMin = readMem(_vIndex[i], intCast(vPos.back())-1).real();
3003 pos = i;
3004 }
3005 }
3006
3007 if (!vPos.size())
3008 return std::vector<mu::value_type>(1, NAN);
3009
3010 // Use the global minimal value for ALL and GRID
3011 if (dir == ALL || dir == GRID)
3012 return std::vector<mu::value_type>({_vIndex[pos]+1, vPos[pos]});
3013
3014 return vPos;
3015}
3016
3017
3027std::vector<mu::value_type> Memory::maxpos(const VectorIndex& _vIndex, int dir) const
3028{
3029 if (!memArray.size())
3030 return std::vector<mu::value_type>(1, NAN);
3031
3032 int lines = getLines(false);
3033 int cols = getCols(false);
3034
3035 _vIndex.setOpenEndIndex(dir & COLS ? cols-1 : lines-1);
3036 int nGridOffset = 2*((dir & GRID) != 0);
3037
3038 // If a grid is required, get the grid dimensions
3039 // of this table
3040 if (nGridOffset)
3041 {
3042 std::vector<mu::value_type> vSize = size(VectorIndex(), GRID);
3043 lines = vSize.front().real();
3044 cols = vSize.back().real()+nGridOffset; // compensate the offset
3045 }
3046
3047 // A special case for the columns. We will compute the
3048 // results for ALL and GRID using the results for LINES
3049 if (dir & COLS)
3050 {
3051 std::vector<mu::value_type> vPos;
3052
3053 for (size_t j = 0; j < _vIndex.size(); j++)
3054 {
3055 if (_vIndex[j] < nGridOffset || _vIndex[j] >= cols)
3056 continue;
3057
3058 vPos.push_back(cmp(VectorIndex(0, VectorIndex::OPEN_END), VectorIndex(_vIndex[j]), max(VectorIndex(0, VectorIndex::OPEN_END), VectorIndex(_vIndex[j])), 0));
3059 }
3060
3061 if (!vPos.size())
3062 vPos.push_back(NAN);
3063
3064 return vPos;
3065 }
3066
3067 std::vector<mu::value_type> vPos;
3068 double dMax = NAN;
3069 size_t pos;
3070
3071 // Compute the results for LINES and find as
3072 // well the global maximal value, which will be used
3073 // for GRID and ALL
3074 for (size_t i = 0; i < _vIndex.size(); i++)
3075 {
3076 if (_vIndex[i] < 0 || _vIndex[i] >= lines)
3077 continue;
3078
3079 vPos.push_back(cmp(VectorIndex(_vIndex[i]), VectorIndex(nGridOffset, VectorIndex::OPEN_END), max(VectorIndex(_vIndex[i]), VectorIndex(nGridOffset, VectorIndex::OPEN_END)), 0));
3080
3081 if (isnan(dMax) || dMax < readMem(_vIndex[i], intCast(vPos.back())-1).real())
3082 {
3083 dMax = readMem(_vIndex[i], intCast(vPos.back())-1).real();
3084 pos = i;
3085 }
3086 }
3087
3088 if (!vPos.size())
3089 return std::vector<mu::value_type>(1, NAN);
3090
3091 // Use the global maximal value for ALL and GRID
3092 if (dir == ALL || dir == GRID)
3093 return std::vector<mu::value_type>({_vIndex[pos]+1, vPos[pos]});
3094
3095 return vPos;
3096}
3097
3098
3109static bool closeEnough(double d1, double d2)
3110{
3111 return abs(d1 - d2) < 1e-16 * max(1.0, min(abs(d1), abs(d2)));
3112}
3113
3114
3125static bool closeEnough(const mu::value_type& v1, const mu::value_type& v2)
3126{
3127 return closeEnough(v1.real(), v2.real()) && closeEnough(v1.imag(), v2.imag());
3128}
3129
3130
3140std::vector<mu::value_type> Memory::findCols(const std::vector<std::string>& vColNames) const
3141{
3142 std::vector<mu::value_type> vColIndices;
3143
3144 for (const auto& sName : vColNames)
3145 {
3146 for (size_t i = 0; i < memArray.size(); i++)
3147 {
3148 if (memArray[i] && memArray[i]->m_sHeadLine == sName)
3149 vColIndices.push_back(i+1.0);
3150 }
3151 }
3152
3153 if (!vColIndices.size())
3154 vColIndices.push_back(NAN);
3155
3156 return vColIndices;
3157}
3158
3159
3172std::vector<mu::value_type> Memory::countIfEqual(const VectorIndex& _vCols, const std::vector<mu::value_type>& vValues,
3173 const std::vector<std::string>& vStringValues) const
3174{
3175 std::vector<mu::value_type> vCounted;
3176
3177 for (size_t j = 0; j < _vCols.size(); j++)
3178 {
3179 if (_vCols[j] >= (int)memArray.size() || !memArray[_vCols[j]])
3180 continue;
3181
3182 if (vValues.size())
3183 {
3184 for (const auto& val : vValues)
3185 {
3186 size_t count = 0;
3187
3188 for (size_t i = 0; i < memArray[_vCols[j]]->size(); i++)
3189 {
3190 if (closeEnough(memArray[_vCols[j]]->getValue(i), val))
3191 count++;
3192 }
3193
3194 vCounted.push_back(count);
3195 }
3196 }
3197 else
3198 {
3199 for (const auto& sVal : vStringValues)
3200 {
3201 size_t count = 0;
3202
3203 for (size_t i = 0; i < memArray[_vCols[j]]->size(); i++)
3204 {
3205 if (memArray[_vCols[j]]->getValueAsInternalString(i) == sVal)
3206 count++;
3207 }
3208
3209 vCounted.push_back(count);
3210 }
3211 }
3212 }
3213
3214 if (!vCounted.size())
3215 vCounted.push_back(NAN);
3216
3217 return vCounted;
3218}
3219
3220
3233std::vector<mu::value_type> Memory::getIndex(size_t col, const std::vector<mu::value_type>& vValues,
3234 const std::vector<std::string>& vStringValues) const
3235{
3236 std::vector<mu::value_type> vIndex;
3237
3238 if (col >= memArray.size() || !memArray[col])
3239 return std::vector<mu::value_type>(1, NAN);
3240
3241 if (vValues.size())
3242 {
3243 for (const auto& val : vValues)
3244 {
3245 if (vIndex.size())
3246 vIndex.push_back(NAN);
3247
3248 for (size_t i = 0; i < memArray[col]->size(); i++)
3249 {
3250 if (closeEnough(memArray[col]->getValue(i), val))
3251 vIndex.push_back(i+1);
3252 }
3253 }
3254 }
3255 else
3256 {
3257 for (const auto& sVal : vStringValues)
3258 {
3259 if (vIndex.size())
3260 vIndex.push_back(NAN);
3261
3262 for (size_t i = 0; i < memArray[col]->size(); i++)
3263 {
3264 if (memArray[col]->getValueAsInternalString(i) == sVal)
3265 vIndex.push_back(i+1);
3266 }
3267 }
3268 }
3269
3270 if (!vIndex.size())
3271 vIndex.push_back(NAN);
3272
3273 return vIndex;
3274}
3275
3276
3288AnovaResult Memory::getOneWayAnova(size_t colCategories, size_t colValues, const VectorIndex& _vIndex, double significance) const
3289{
3290 // Get indices
3291 if (colCategories > memArray.size()
3292 || !memArray[colCategories]
3293 || memArray[colCategories]->m_type != TableColumn::TYPE_CATEGORICAL
3294 || significance >= 1.0
3295 || significance <= 0.0)
3296 {
3297 AnovaResult res;
3298 res.m_FRatio = NAN;
3299 return res;
3300 }
3301
3302 _vIndex.setOpenEndIndex(getElemsInColumn(colCategories)-1);
3303 Memory _mem(2);
3304 _mem.memArray[0].reset(memArray[colCategories]->copy(_vIndex));
3305 _mem.memArray[1].reset(memArray[colValues]->copy(_vIndex));
3306
3307 const std::vector<std::string>& vCategories = static_cast<CategoricalColumn*>(_mem.memArray[0].get())->getCategories();
3308
3309 // Copy into different columns
3310 for (const auto& cat : vCategories)
3311 {
3312 std::vector<mu::value_type> catIndex = _mem.getIndex(0, std::vector<mu::value_type>(), std::vector<std::string>(1, cat));
3313
3314 if (mu::isnan(catIndex.front()))
3315 continue;
3316
3317 _mem.memArray.push_back(TblColPtr(_mem.memArray[1]->copy(VectorIndex(&catIndex[0], catIndex.size(), 0))));
3318 }
3319
3320 // Prepare vectors for each group
3321 std::vector<mu::value_type> vAvg;
3322 std::vector<mu::value_type> vVar;
3323 std::vector<mu::value_type> vNum;
3324
3325 // Get the values for each group
3326 for (size_t j = 2; j < _mem.memArray.size(); j++)
3327 {
3328 vAvg.push_back(_mem.avg(VectorIndex(0, VectorIndex::OPEN_END), VectorIndex(j)));
3329 vNum.push_back(_mem.num(VectorIndex(0, VectorIndex::OPEN_END), VectorIndex(j)));
3330 vVar.push_back(intPower(_mem.std(VectorIndex(0, VectorIndex::OPEN_END), VectorIndex(j)), 2));
3331 }
3332
3333 // Calculate the overall values
3336 mu::value_type overallVariance;
3337
3338 // Pretend that each group contains n equal measurements
3339 // to calculate the ideal overall variance
3340 for (size_t i = 0; i < vAvg.size(); i++)
3341 {
3342 overallVariance += vNum[i] * intPower(vAvg[i]-overallAvg, 2);
3343 }
3344
3345 // Calculate the average group variance
3346 mu::value_type avgGroupVariance = std::accumulate(vVar.begin(), vVar.end(), mu::value_type()) / (double)vVar.size();
3347
3348 double overallDOF = vVar.size() - 1.0;
3349 double sumOfGroupDOFs = overallNum.real() - vVar.size();
3350
3351 // Normalize only the ideal overall variance as
3352 // the squared STDEVs are already group-normalized
3353 overallVariance /= overallDOF;
3354 //avgGroupVariance /= (double)vVar.size();
3355
3356 // Sum up all information
3357 AnovaResult res;
3358 res.m_FRatio = overallVariance / avgGroupVariance;
3359 res.m_significanceVal = gsl_cdf_fdist_Pinv(1.0 - significance, overallDOF, sumOfGroupDOFs);
3360 res.m_significance = significance;
3361 res.m_isSignificant = res.m_FRatio.real() >= res.m_significanceVal.real();
3362 res.m_numCategories = vVar.size();
3363
3364 return res;
3365}
3366
3367
3380mu::value_type Memory::getCovariance(size_t col1, const VectorIndex& _vIndex1, size_t col2, const VectorIndex& _vIndex2) const
3381{
3382 _vIndex1.setOpenEndIndex(getElemsInColumn(col1)-1);
3383 _vIndex2.setOpenEndIndex(getElemsInColumn(col2)-1);
3384
3385 size_t minSize = std::min(_vIndex1.size(), _vIndex2.size());
3386
3387 mu::value_type vAvg1 = avg(_vIndex1.subidx(0, minSize), VectorIndex(col1));
3388 mu::value_type vAvg2 = avg(_vIndex2.subidx(0, minSize), VectorIndex(col2));
3389
3390 mu::value_type vCov = 0.0;
3391
3392 for (size_t i = 0; i < minSize; i++)
3393 {
3394 vCov += (readMem(_vIndex1[i], col1) - vAvg1) * (readMem(_vIndex2[i], col2) - vAvg2);
3395 }
3396
3397 return vCov;
3398}
3399
3400
3413mu::value_type Memory::getPearsonCorr(size_t col1, const VectorIndex& _vIndex1, size_t col2, const VectorIndex& _vIndex2) const
3414{
3415 _vIndex1.setOpenEndIndex(getElemsInColumn(col1)-1);
3416 _vIndex2.setOpenEndIndex(getElemsInColumn(col2)-1);
3417
3418 size_t minSize = std::min(_vIndex1.size(), _vIndex2.size());
3419
3420 return getCovariance(col1, _vIndex1, col2, _vIndex2)
3421 / ((minSize-1.0) * std(_vIndex1.subidx(0, minSize), VectorIndex(col1)) * std(_vIndex2.subidx(0, minSize), VectorIndex(col2)));
3422}
3423
3424
3437mu::value_type Memory::getSpearmanCorr(size_t col1, const VectorIndex& _vIndex1, size_t col2, const VectorIndex& _vIndex2) const
3438{
3439 _vIndex1.setOpenEndIndex(getElemsInColumn(col1)-1);
3440 _vIndex2.setOpenEndIndex(getElemsInColumn(col2)-1);
3441
3442 size_t minSize = std::min(_vIndex1.size(), _vIndex2.size());
3443
3444 Memory _mem(2);
3445
3446 _mem.memArray[0].reset(new ValueColumn(minSize));
3447 _mem.memArray[0]->setValue(VectorIndex(0, minSize), getRank(col1, _vIndex1.subidx(0, minSize), RANK_FRACTIONAL));
3448 _mem.memArray[1].reset(new ValueColumn(minSize));
3449 _mem.memArray[1]->setValue(VectorIndex(0, minSize), getRank(col2, _vIndex2.subidx(0, minSize), RANK_FRACTIONAL));
3450
3452}
3453
3454
3465static void evaluateRankingStrategy(std::vector<mu::value_type>& vRank, size_t& nEqualRanks, Memory::RankingStrategy _strat)
3466{
3467 switch (_strat)
3468 {
3469 case Memory::RANK_DENSE:
3470 vRank.insert(vRank.end(), nEqualRanks, vRank.back());
3471 vRank.push_back(vRank.back()+1.0);
3472 break;
3474 vRank.insert(vRank.end(), nEqualRanks, vRank.back());
3475 vRank.push_back(vRank.back()+(nEqualRanks+1.0));
3476 break;
3478 {
3479 mu::value_type val = vRank.back();
3480 vRank.pop_back();
3481 vRank.insert(vRank.end(), nEqualRanks+1, val+0.5*nEqualRanks);
3482 vRank.push_back(val+(nEqualRanks+1.0));
3483 break;
3484 }
3485 }
3486
3487 nEqualRanks = 0;
3488}
3489
3490
3501std::vector<mu::value_type> Memory::getRank(size_t col, const VectorIndex& _vIndex, Memory::RankingStrategy _strat) const
3502{
3503 _vIndex.setOpenEndIndex(getElemsInColumn(col)-1);
3504
3505 Memory _mem(1);
3506 _mem.memArray.back().reset(memArray[col]->copy(_vIndex));
3507
3508 std::vector<int> vIndex = _mem.sortElements(0, _mem.getLines(false)-1, 0, -1, "-index");
3509 std::vector<mu::value_type> vRank(1, 1.0);
3510 size_t nEqualRanks = 0;
3511
3512 if (_mem.memArray.back()->m_type < TableColumn::TYPE_CATEGORICAL)
3513 {
3514 for (size_t i = 1; i < vIndex.size(); i++)
3515 {
3516 // Indices are already 1-based and NANs are always at the end
3517 if (mu::isnan(_mem.readMem(vIndex[i]-1, 0)))
3518 vRank.push_back(NAN);
3519 else if (_mem.readMem(vIndex[i]-1, 0) != _mem.readMem(vIndex[i-1]-1, 0))
3520 {
3521 if (nEqualRanks)
3522 evaluateRankingStrategy(vRank, nEqualRanks, _strat);
3523 else
3524 vRank.push_back(vRank.back()+1.0);
3525 }
3526 else
3527 nEqualRanks++;
3528 }
3529 }
3530 else
3531 {
3532 TableColumn* col = _mem.memArray.back().get();
3533
3534 for (size_t i = 1; i < vIndex.size(); i++)
3535 {
3536 // Indices are already 1-based
3537 if (col->getValueAsInternalString(vIndex[i]-1) != col->getValueAsInternalString(vIndex[i-1]-1))
3538 {
3539 if (nEqualRanks)
3540 evaluateRankingStrategy(vRank, nEqualRanks, _strat);
3541 else
3542 vRank.push_back(vRank.back()+1.0);
3543 }
3544 else
3545 nEqualRanks++;
3546 }
3547 }
3548
3549 if (nEqualRanks)
3550 {
3551 evaluateRankingStrategy(vRank, nEqualRanks, _strat);
3552 vRank.pop_back();
3553 }
3554
3555 std::vector<mu::value_type> vRankReordered(vRank);
3556
3557 for (size_t i = 0; i < vIndex.size(); i++)
3558 {
3559 vRankReordered[vIndex[i]-1] = vRank[i];
3560 }
3561
3562 return vRankReordered;
3563}
3564
3565
3575std::vector<mu::value_type> Memory::getZScore(size_t col, const VectorIndex& _vIndex) const
3576{
3577 _vIndex.setOpenEndIndex(getElemsInColumn(col)-1);
3578
3579 std::vector<mu::value_type> vZScore;
3580
3581 mu::value_type avgVal = avg(_vIndex, VectorIndex(col));
3582 mu::value_type stdVal = std(_vIndex, VectorIndex(col));
3583
3584 for (size_t i = 0; i < _vIndex.size(); i++)
3585 {
3586 vZScore.push_back((readMem(_vIndex[i], col) - avgVal) / stdVal);
3587 }
3588
3589 return vZScore;
3590}
3591
3592
3602std::vector<mu::value_type> Memory::getBins(size_t col, size_t nBins) const
3603{
3604 std::vector<mu::value_type> vBins;
3605
3606 // Ensure that we have data
3607 if (memArray.size() <= col || !memArray[col])
3608 {
3609 vBins.resize(!nBins || nBins >= memArray[col]->size() ? 1 : nBins, NAN);
3610 return vBins;
3611 }
3612
3613 // Get the column type
3614 TableColumn::ColumnType type = memArray[col]->m_type;
3615
3616 // Handle different column types differently
3618 {
3619 // We use the categories as bins and ignore user settings
3620 std::vector<std::string> vCategories = static_cast<CategoricalColumn*>(memArray[col].get())->getCategories();
3621 nBins = vCategories.size();
3622 vBins.resize(nBins, 0.0);
3623
3624 for (size_t i = 0; i < memArray[col]->size(); i++)
3625 {
3626 if (memArray[col]->getValue(i).real() > 0.0)
3627 vBins[memArray[col]->getValue(i).real()-1] += 1.0;
3628 }
3629 }
3630 else if (type == TableColumn::TYPE_LOGICAL)
3631 {
3632 // We use the logical values as bins and ignore user settings
3633 nBins = 2;
3634 vBins.resize(nBins, 0.0);
3635
3636 for (size_t i = 0; i < memArray[col]->size(); i++)
3637 {
3638 if (memArray[col]->getValue(i) == 1.0)
3639 vBins[0] += 1.0;
3640 else if (memArray[col]->getValue(i) == 0.0)
3641 vBins[1] += 1.0;
3642 }
3643 }
3644 else if (type == TableColumn::TYPE_STRING)
3645 vBins.resize(!nBins || nBins >= memArray[col]->size() ? 1 : nBins, NAN); // Strings are not binnable
3646 else
3647 {
3648 // Calculate the bins following the (simple) Sturges rule
3649 if (!nBins || nBins >= memArray[col]->size() )
3650 nBins = (int)std::rint(1.0 + 3.3 * std::log10(num(VectorIndex(0, VectorIndex::OPEN_END), VectorIndex(col)).real()));
3651
3652 // Calculate min, max and range of the data. We'll only consider real values
3653 vBins.resize(nBins, 0.0);
3654 double dMin = min(VectorIndex(0, VectorIndex::OPEN_END), VectorIndex(col)).real();
3655 double dMax = max(VectorIndex(0, VectorIndex::OPEN_END), VectorIndex(col)).real();
3656 double dRange = dMax - dMin;
3657
3658 for (size_t i = 0; i < memArray[col]->size(); i++)
3659 {
3660 if (!mu::isnan(memArray[col]->getValue(i)))
3661 vBins[std::min(nBins-1.0, nBins * (memArray[col]->getValue(i).real()-dMin) / dRange)] += 1.0;
3662 }
3663 }
3664
3665 return vBins;
3666}
3667
3668
3680bool Memory::retouch(VectorIndex _vLine, VectorIndex _vCol, AppDir Direction)
3681{
3682 bool bUseAppendedZeroes = false;
3683
3684 if (!memArray.size())
3685 return false;
3686
3687 if (!_vLine.isValid() || !_vCol.isValid())
3688 return false;
3689
3690 // Evaluate the indices
3691 if (_vLine.isOpenEnd())
3692 bUseAppendedZeroes = true;
3693
3694 _vLine.setRange(0, getLines()-1);
3695 _vCol.setRange(0, getCols()-1);
3696
3697 if ((Direction == ALL || Direction == GRID) && _vLine.size() < 4)
3698 Direction = LINES;
3699
3700 if ((Direction == ALL || Direction == GRID) && _vCol.size() < 4)
3701 Direction = COLS;
3702
3703 // Pre-evaluate the axis values in the GRID case
3704 if (Direction == GRID)
3705 {
3706 if (bUseAppendedZeroes)
3707 {
3708 if (!retouch(_vLine, VectorIndex(_vCol[0]), COLS) || !retouch(_vLine, VectorIndex(_vCol[1]), COLS))
3709 return false;
3710 }
3711 else
3712 {
3713 if (!retouch(_vLine, _vCol.subidx(0, 2), COLS))
3714 return false;
3715 }
3716
3717 _vCol = _vCol.subidx(2);
3718 }
3719
3720 // Redirect the control to the specialized member
3721 // functions
3722 if (Direction == ALL || Direction == GRID)
3723 {
3724 _vLine.linearize();
3725 _vCol.linearize();
3726
3727 return retouch2D(_vLine, _vCol);
3728 }
3729 else
3730 return retouch1D(_vLine, _vCol, Direction);
3731}
3732
3733
3744bool Memory::retouch1D(const VectorIndex& _vLine, const VectorIndex& _vCol, AppDir Direction)
3745{
3746 bool markModified = false;
3747
3748 if (Direction == LINES)
3749 {
3750 for (size_t i = 0; i < _vLine.size(); i++)
3751 {
3752 for (size_t j = 0; j < _vCol.size(); j++)
3753 {
3754 if (mu::isnan(readMem(_vLine[i], _vCol[j])))
3755 {
3756 for (size_t _j = j; _j < _vCol.size(); _j++)
3757 {
3758 if (!mu::isnan(readMem(_vLine[i], _vCol[_j])))
3759 {
3760 if (j)
3761 {
3762 for (size_t __j = j; __j < _j; __j++)
3763 {
3764 writeData(_vLine[i],
3765 _vCol[__j],
3766 (readMem(_vLine[i], _vCol[_j]) - readMem(_vLine[i], _vCol[j-1])) / (double)(_j - j) * (double)(__j - j + 1) + readMem(_vLine[i], _vCol[j-1]));
3767 }
3768
3769 markModified = true;
3770 break;
3771 }
3772 else if (_j+1 < _vCol.size())
3773 {
3774 for (size_t __j = j; __j < _j; __j++)
3775 {
3776 writeData(_vLine[i], _vCol[__j], readMem(_vLine[i], _vCol[_j]));
3777 }
3778
3779 markModified = true;
3780 break;
3781 }
3782 }
3783
3784 if (j && _j+1 == _vCol.size() && mu::isnan(readMem(_vLine[i], _vCol[_j])))
3785 {
3786 for (size_t __j = j; __j < _vCol.size(); __j++)
3787 {
3788 writeData(_vLine[i], _vCol[__j], readMem(_vLine[i], _vCol[j-1]));
3789 }
3790
3791 markModified = true;
3792 }
3793 }
3794 }
3795 }
3796 }
3797 }
3798 else if (Direction == COLS)
3799 {
3800 for (size_t j = 0; j < _vCol.size(); j++)
3801 {
3802 for (size_t i = 0; i < _vLine.size(); i++)
3803 {
3804 if (mu::isnan(readMem(_vLine[i], _vCol[j])))
3805 {
3806 for (size_t _i = i; _i < _vLine.size(); _i++)
3807 {
3808 if (!mu::isnan(readMem(_vLine[_i], _vCol[j])))
3809 {
3810 if (i)
3811 {
3812 for (size_t __i = i; __i < _i; __i++)
3813 {
3814 writeData(_vLine[__i],
3815 _vCol[j],
3816 (readMem(_vLine[_i], _vCol[j]) - readMem(_vLine[i-1], _vCol[j])) / (double)(_i - i) * (double)(__i - i + 1) + readMem(_vLine[i-1], _vCol[j]));
3817 }
3818
3819 markModified = true;
3820 break;
3821 }
3822 else if (_i+1 < _vLine.size())
3823 {
3824 for (size_t __i = i; __i < _i; __i++)
3825 {
3826 writeData(_vLine[__i], _vCol[j], readMem(_vLine[_i], _vCol[j]));
3827 }
3828
3829 markModified = true;
3830 break;
3831 }
3832 }
3833
3834 if (i && _i+1 == _vLine.size() && mu::isnan(readMem(_vLine[_i], _vCol[j])))
3835 {
3836 for (size_t __i = i; __i < _vLine.size(); __i++)
3837 {
3838 writeData(_vLine[__i], _vCol[j], readMem(_vLine[i-1], _vCol[j]));
3839 }
3840
3841 markModified = true;
3842 }
3843 }
3844 }
3845 }
3846 }
3847 }
3848
3849 if (markModified)
3850 m_meta.modify();
3851
3852 return true;
3853}
3854
3855
3866bool Memory::retouch2D(const VectorIndex& _vLine, const VectorIndex& _vCol)
3867{
3868 bool bMarkModified = false;
3869
3870 for (long long int i = _vLine.front(); i <= _vLine.last(); i++)
3871 {
3872 for (long long int j = _vCol.front(); j <= _vCol.last(); j++)
3873 {
3874 if (mu::isnan(readMem(i, j)))
3875 {
3876 Boundary _boundary = findValidBoundary(_vLine, _vCol, i, j);
3877 NumeRe::RetouchRegion _region(_boundary.rows-1,
3878 _boundary.cols-1,
3879 med(VectorIndex(_boundary.rf(), _boundary.re()), VectorIndex(_boundary.cf(), _boundary.ce())));
3880
3881 long long int l,r,t,b;
3882
3883 // Find the correct boundary to be used instead of the
3884 // one outside of the range (if one of the indices is on
3885 // any of the four boundaries
3886 l = _boundary.cf() < _vCol.front() ? _boundary.ce() : _boundary.cf();
3887 r = _boundary.ce() > _vCol.last() ? _boundary.cf() : _boundary.ce();
3888 t = _boundary.rf() < _vLine.front() ? _boundary.re() : _boundary.rf();
3889 b = _boundary.re() > _vLine.last() ? _boundary.rf() : _boundary.re();
3890
3891 _region.setBoundaries(readMem(VectorIndex(_boundary.rf(), _boundary.re()), VectorIndex(l)),
3892 readMem(VectorIndex(_boundary.rf(), _boundary.re()), VectorIndex(r)),
3893 readMem(VectorIndex(t), VectorIndex(_boundary.cf(), _boundary.ce())),
3894 readMem(VectorIndex(b), VectorIndex(_boundary.cf(), _boundary.ce())));
3895
3896 for (long long int _n = _boundary.rf()+1; _n < _boundary.re(); _n++)
3897 {
3898 for (long long int _m = _boundary.cf()+1; _m < _boundary.ce(); _m++)
3899 {
3900 writeData(_n, _m,
3901 _region.retouch(_n - _boundary.rf() - 1,
3902 _m - _boundary.cf() - 1,
3903 readMem(_n, _m),
3904 med(VectorIndex(_n-1, _n+1), VectorIndex(_m-1, _m+1))));
3905 }
3906 }
3907
3908 bMarkModified = true;
3909 }
3910 }
3911 }
3912
3913 if (bMarkModified)
3914 m_meta.modify();
3915
3916 return true;
3917}
3918
3919
3930bool Memory::onlyValidValues(const VectorIndex& _vLine, const VectorIndex& _vCol) const
3931{
3932 return num(_vLine, _vCol) == cnt(_vLine, _vCol);
3933}
3934
3935
3949Boundary Memory::findValidBoundary(const VectorIndex& _vLine, const VectorIndex& _vCol, int i, int j) const
3950{
3951 Boundary _boundary(i-1, j-1, 2, 2);
3952
3953 bool reEvaluateBoundaries = true;
3954
3955 while (reEvaluateBoundaries)
3956 {
3957 reEvaluateBoundaries = false;
3958
3959 if (!onlyValidValues(VectorIndex(_boundary.rf(), _boundary.re()), VectorIndex(_boundary.cf())) && _boundary.cf() > _vCol.front())
3960 {
3961 _boundary.m--;
3962 _boundary.cols++;
3963 reEvaluateBoundaries = true;
3964 }
3965
3966 if (!onlyValidValues(VectorIndex(_boundary.rf(), _boundary.re()), VectorIndex(_boundary.ce())) && _boundary.ce() < _vCol.last())
3967 {
3968 _boundary.cols++;
3969 reEvaluateBoundaries = true;
3970 }
3971
3972 if (!onlyValidValues(VectorIndex(_boundary.rf()), VectorIndex(_boundary.cf(), _boundary.ce())) && _boundary.rf() > _vLine.front())
3973 {
3974 _boundary.n--;
3975 _boundary.rows++;
3976 reEvaluateBoundaries = true;
3977 }
3978
3979 if (!onlyValidValues(VectorIndex(_boundary.re()), VectorIndex(_boundary.cf(), _boundary.ce())) && _boundary.re() < _vLine.last())
3980 {
3981 _boundary.rows++;
3982 reEvaluateBoundaries = true;
3983 }
3984 }
3985
3986 return _boundary;
3987}
3988
3989
4004void Memory::smoothingWindow1D(const VectorIndex& _vLine, const VectorIndex& _vCol, size_t i, size_t j, NumeRe::Filter* _filter, bool smoothLines)
4005{
4006 auto sizes = _filter->getWindowSize();
4007
4008 mu::value_type sum = 0.0;
4009 NumeRe::FilterBuffer& filterBuffer = _filter->getBuffer();
4010
4011 // Apply the filter to the data
4012 for (size_t n = 0; n < sizes.first; n++)
4013 {
4014 if (!_filter->isConvolution())
4015 writeData(_vLine[i+n*(!smoothLines)], _vCol[j+n*smoothLines], _filter->apply(n, 0, readMem(_vLine[i+n*(!smoothLines)], _vCol[j+n*smoothLines])));
4016 else
4017 sum += _filter->apply(n, 0, readMem(_vLine[i+n*(!smoothLines)], _vCol[j+n*smoothLines]));
4018 }
4019
4020 // If the filter is a convolution, store the new value here
4021 if (_filter->isConvolution())
4022 filterBuffer.push(sum);
4023
4024 // If enough elements are stored in the buffer
4025 // remove the first one
4026 if (filterBuffer.size() > sizes.first/2)
4027 {
4028 // Writes the element to the first position of the window
4029 writeData(_vLine[i], _vCol[j], filterBuffer.front());
4030 filterBuffer.pop();
4031 }
4032
4033 // Is this the last point? Then extract all remaining points from the
4034 // buffer
4035 if (smoothLines && _vCol.size()-sizes.first-1 == j)
4036 {
4037 while (!filterBuffer.empty())
4038 {
4039 j++;
4040 writeData(_vLine[i], _vCol[j], filterBuffer.front());
4041 filterBuffer.pop();
4042 }
4043 }
4044 else if (!smoothLines && _vLine.size()-sizes.first-1 == i)
4045 {
4046 while (!filterBuffer.empty())
4047 {
4048 i++;
4049 writeData(_vLine[i], _vCol[j], filterBuffer.front());
4050 filterBuffer.pop();
4051 }
4052 }
4053}
4054
4055
4069void Memory::smoothingWindow2D(const VectorIndex& _vLine, const VectorIndex& _vCol, size_t i, size_t j, NumeRe::Filter* _filter)
4070{
4071 auto sizes = _filter->getWindowSize();
4072 NumeRe::FilterBuffer2D& filterBuffer = _filter->get2DBuffer();
4073
4074 mu::value_type sum = 0.0;
4075
4076 // Apply the filter to the data
4077 for (size_t n = 0; n < sizes.first; n++)
4078 {
4079 for (size_t m = 0; m < sizes.second; m++)
4080 {
4081 if (!_filter->isConvolution())
4082 writeData(_vLine[i+n], _vCol[j+m], _filter->apply(n, m, readMem(_vLine[i+n], _vCol[j+m])));
4083 else
4084 sum += _filter->apply(n, m, readMem(_vLine[i+n], _vCol[j+m]));
4085 }
4086 }
4087
4088 // If the filter is a convolution, store the new value here
4089 if (_filter->isConvolution())
4090 {
4091 if (j == 1)
4092 filterBuffer.push(std::vector<mu::value_type>());
4093
4094 filterBuffer.back().push_back(sum);
4095 }
4096
4097 // If enough elements are stored in the buffer
4098 // remove the first row
4099 if (filterBuffer.size() > sizes.first/2+1)
4100 {
4101 // Write the finished row
4102 for (size_t k = 0; k < filterBuffer.front().size(); k++)
4103 writeData(_vLine[i-1], _vCol[k+sizes.second/2+1], filterBuffer.front()[k]);
4104
4105 filterBuffer.pop();
4106 }
4107
4108 // Is this the last point? Then extract all remaining points from the
4109 // buffer
4110 if (_vLine.size()-sizes.first-1 == i && _vCol.size()-sizes.second-1 == j)
4111 {
4112 while (!filterBuffer.empty())
4113 {
4114
4115 for (size_t k = 0; k < filterBuffer.front().size(); k++)
4116 writeData(_vLine[i], _vCol[k+sizes.second/2+1], filterBuffer.front()[k]);
4117
4118 i++;
4119 filterBuffer.pop();
4120 }
4121 }
4122}
4123
4124
4138bool Memory::smooth(VectorIndex _vLine, VectorIndex _vCol, NumeRe::FilterSettings _settings, AppDir Direction)
4139{
4140 bool bUseAppendedZeroes = false;
4141
4142 // Avoid the border cases
4143 if (!memArray.size())
4145
4146 if (!_vLine.isValid() || !_vCol.isValid())
4147 throw SyntaxError(SyntaxError::INVALID_INDEX, "smooth", SyntaxError::invalid_position, _vLine.to_string() + ", " + _vCol.to_string());
4148
4149 // Evaluate the indices
4150 if (_vLine.isOpenEnd())
4151 bUseAppendedZeroes = true;
4152
4153 // Force the index ranges
4154 _vLine.setRange(0, getLines()-1);
4155 _vCol.setRange(0, getCols()-1);
4156
4157 // Change the predefined application directions, if it's needed
4158 if ((Direction == ALL || Direction == GRID) && _vLine.size() < 4)
4159 Direction = LINES;
4160
4161 if ((Direction == ALL || Direction == GRID) && _vCol.size() < 4)
4162 Direction = COLS;
4163
4164 // Check the order
4165 if ((_settings.row >= (size_t)getLines() && Direction == COLS) || (_settings.col >= (size_t)getCols() && Direction == LINES) || ((_settings.row >= (size_t)getLines() || _settings.col >= (size_t)getCols()) && (Direction == ALL || Direction == GRID)))
4167
4168
4169 // If the application direction is equal to GRID, then the first two columns
4170 // should be evaluted separately, because they contain the axis values
4171 if (Direction == GRID)
4172 {
4173 // Will never return false
4174 if (bUseAppendedZeroes)
4175 {
4176 if (!smooth(_vLine, VectorIndex(_vCol[0]), _settings, COLS) || !smooth(_vLine, VectorIndex(_vCol[1]), _settings, COLS))
4177 return false;
4178 }
4179 else
4180 {
4181 if (!smooth(_vLine, _vCol.subidx(0, 2), _settings, COLS))
4182 return false;
4183 }
4184
4185 _vCol = _vCol.subidx(2);
4186 }
4187
4188 // The first job is to simply remove invalid values and then smooth the
4189 // framing points of the data section
4190 if (Direction == ALL || Direction == GRID)
4191 {
4192 // Retouch everything
4193 Memory::retouch(_vLine, _vCol, ALL);
4194
4195 //Memory::smooth(_vLine, VectorIndex(_vCol.front()), _settings, COLS);
4196 //Memory::smooth(_vLine, VectorIndex(_vCol.last()), _settings, COLS);
4197 //Memory::smooth(VectorIndex(_vLine.front()), _vCol, _settings, LINES);
4198 //Memory::smooth(VectorIndex(_vLine.last()), _vCol, _settings, LINES);
4199
4200 if (_settings.row == 1u && _settings.col != 1u)
4201 _settings.row = _settings.col;
4202 else if (_settings.row != 1u && _settings.col == 1u)
4203 _settings.col = _settings.row;
4204 }
4205 else
4206 {
4207 _settings.row = std::max(_settings.row, _settings.col);
4208 _settings.col = 1u;
4209 }
4210
4211 if (isnan(_settings.alpha))
4212 _settings.alpha = 1.0;
4213
4214 // Apply the actual smoothing of the data
4215 if (Direction == LINES)
4216 {
4217 // Create a filter from the filter settings
4218 std::unique_ptr<NumeRe::Filter> _filterPtr(NumeRe::createFilter(_settings));
4219
4220 // Update the sizes, because they might be
4221 // altered by the filter constructor
4222 auto sizes = _filterPtr->getWindowSize();
4223 _settings.row = sizes.first;
4224
4225 // Pad the beginning and the of the vector with multiple copies
4226 _vCol.prepend(vector<int>(_settings.row/2+1, _vCol.front()));
4227 _vCol.append(vector<int>(_settings.row/2+1, _vCol.last()));
4228
4229 // Smooth the lines
4230 for (size_t i = 0; i < _vLine.size(); i++)
4231 {
4232 for (size_t j = 1; j < _vCol.size() - _settings.row; j++)
4233 {
4234 smoothingWindow1D(_vLine, _vCol, i, j, _filterPtr.get(), true);
4235 }
4236 }
4237 }
4238 else if (Direction == COLS)
4239 {
4240 // Create a filter from the settings
4241 std::unique_ptr<NumeRe::Filter> _filterPtr(NumeRe::createFilter(_settings));
4242
4243 // Update the sizes, because they might be
4244 // altered by the filter constructor
4245 auto sizes = _filterPtr->getWindowSize();
4246 _settings.row = sizes.first;
4247
4248 // Pad the beginning and end of the vector with multiple copies
4249 _vLine.prepend(vector<int>(_settings.row/2+1, _vLine.front()));
4250 _vLine.append(vector<int>(_settings.row/2+1, _vLine.last()));
4251
4252 // Smooth the columns
4253 for (size_t j = 0; j < _vCol.size(); j++)
4254 {
4255 for (size_t i = 1; i < _vLine.size() - _settings.row; i++)
4256 {
4257 smoothingWindow1D(_vLine, _vCol, i, j, _filterPtr.get(), false);
4258 }
4259 }
4260 }
4261 else if ((Direction == ALL || Direction == GRID) && _vLine.size() > 2 && _vCol.size() > 2)
4262 {
4263 // Create a filter from the settings
4264 std::unique_ptr<NumeRe::Filter> _filterPtr(NumeRe::createFilter(_settings));
4265
4266 // Update the sizes, because they might be
4267 // altered by the filter constructor
4268 auto sizes = _filterPtr.get()->getWindowSize();
4269 _settings.row = sizes.first;
4270 _settings.col = sizes.second;
4271
4272 // Pad the beginning and end of both vectors
4273 // with a mirrored copy of themselves
4274 std::vector<int> vMirror = _vLine.subidx(1, _settings.row/2+1).getVector();
4275 _vLine.prepend(vector<int>(vMirror.rbegin(), vMirror.rend()));
4276
4277 vMirror = _vLine.subidx(_vLine.size() - _settings.row/2-2, _settings.row/2+1).getVector();
4278 _vLine.append(vector<int>(vMirror.rbegin(), vMirror.rend()));
4279
4280 vMirror = _vCol.subidx(1, _settings.col/2+1).getVector();
4281 _vCol.prepend(vector<int>(vMirror.rbegin(), vMirror.rend()));
4282
4283 vMirror = _vCol.subidx(_vCol.size() - _settings.col/2-2, _settings.row/2+1).getVector();
4284 _vCol.append(vector<int>(vMirror.rbegin(), vMirror.rend()));
4285
4286 // Smooth the data in two dimensions, if that is reasonable
4287 // Go through every point
4288 for (size_t i = 1; i < _vLine.size() - _settings.row; i++)
4289 {
4290 for (size_t j = 1; j < _vCol.size() - _settings.col; j++)
4291 {
4292 smoothingWindow2D(_vLine, _vCol, i, j, _filterPtr.get());
4293 }
4294 }
4295 }
4296
4297 m_meta.modify();
4298 return true;
4299}
4300
4301
4315bool Memory::resample(VectorIndex _vLine, VectorIndex _vCol, std::pair<size_t,size_t> samples, AppDir Direction, std::string sFilter)
4316{
4317 bool bUseAppendedZeroes = false;
4318
4319 int nLinesToInsert = 0;
4320 int nColsToInsert = 0;
4321
4322 static std::vector<std::string> vFilters({"box", "tent", "bell", "bspline", "mitchell", "lanczos3", "blackman",
4323 "lanczos4", "lanczos6", "lanczos12", "kaiser", "gaussian", "catmullrom",
4324 "quadratic_interp", "quadratic_approx", "quadratic_mix"});
4325
4326 if (std::find(vFilters.begin(), vFilters.end(), sFilter) == vFilters.end())
4327 sFilter = "lanczos3";
4328
4329 // Avoid border cases
4330 if (!memArray.size())
4332
4333 if (!samples.first || !samples.second)
4335
4336 if (!_vLine.isValid() || !_vCol.isValid())
4337 throw SyntaxError(SyntaxError::INVALID_INDEX, "resample", SyntaxError::invalid_position, _vLine.to_string() + ", " + _vCol.to_string());
4338
4339 // Evaluate the indices
4340 if (_vCol.isOpenEnd())
4341 bUseAppendedZeroes = true;
4342
4343 _vLine.setRange(0, getLines()-1);
4344 _vLine.linearize();
4345 _vCol.setRange(0, getCols()-1);
4346
4347 // Change the predefined application directions, if it's needed
4348 if ((Direction == ALL || Direction == GRID) && _vLine.size() < 4)
4349 Direction = LINES;
4350
4351 if ((Direction == ALL || Direction == GRID) && _vCol.size() < 4)
4352 Direction = COLS;
4353
4354 // If the application direction is equal to GRID, then the indices should
4355 // match a sufficiently enough large data array
4356 if (Direction == GRID)
4357 {
4358 if (_vCol.size() - 2 != _vLine.size() && !bUseAppendedZeroes)
4360 else if ((!memArray[1] || _vCol.size() - 2 != memArray[1]->size() - _vLine.front()) && bUseAppendedZeroes)
4362 }
4363
4364 // Prepare a pointer to the resampler object
4365 std::unique_ptr<Resampler> _resampler;
4366
4367 // Create the actual resample object based upon the application direction.
4368 // Additionally determine the size of the resampling buffer, which might
4369 // be larger than the current data set
4370 if (Direction == ALL || Direction == GRID) // 2D
4371 {
4372 if (Direction == GRID)
4373 {
4374 // Apply the resampling to the first two columns first:
4375 // These contain the axis values
4376 resample(_vLine, VectorIndex(_vCol[0]), samples, COLS);
4377 resample(_vLine, VectorIndex(_vCol[1]), std::make_pair(samples.second, samples.first), COLS);
4378
4379 // Increment the first column
4380 _vCol = _vCol.subidx(2);
4381 _vCol.linearize();
4382
4383 // Determine the size of the buffer
4384 if (samples.first > _vLine.size())
4385 nLinesToInsert = samples.first - _vLine.size();
4386
4387 if (samples.second > _vCol.size())
4388 nColsToInsert = samples.second - _vCol.size();
4389 }
4390
4391 // Create the resample object and prepare the needed memory
4392 _resampler.reset(new Resampler(_vCol.size(), _vLine.size(),
4393 samples.second, samples.first,
4394 Resampler::BOUNDARY_CLAMP, 1.0, 0.0, sFilter.c_str()));
4395 }
4396 else if (Direction == COLS) // cols
4397 {
4398 _vCol.linearize();
4399
4400 // Create the resample object and prepare the needed memory
4401 _resampler.reset(new Resampler(_vCol.size(), _vLine.size(),
4402 _vCol.size(), samples.first,
4403 Resampler::BOUNDARY_CLAMP, 1.0, 0.0, sFilter.c_str()));
4404
4405 // Determine final size (only upscale)
4406 if (samples.first > _vLine.size())
4407 nLinesToInsert = samples.first - _vLine.size();
4408 }
4409 else if (Direction == LINES)// lines
4410 {
4411 // Create the resample object and prepare the needed memory
4412 _resampler.reset(new Resampler(_vCol.size(), _vLine.size(),
4413 samples.second, _vLine.size(),
4414 Resampler::BOUNDARY_CLAMP, 1.0, 0.0, sFilter.c_str()));
4415
4416 // Determine final size (only upscale)
4417 if (samples.second > _vCol.size())
4418 nColsToInsert = samples.second - _vCol.size();
4419 }
4420
4421 // Ensure that the resampler was created
4422 if (!_resampler)
4424
4425 // Create and initialize the dynamic memory: inserted rows and columns
4426 if (nLinesToInsert)
4427 {
4428 for (size_t j = 0; j < _vCol.size(); j++)
4429 {
4430 if ((int)memArray.size() < _vCol[j] && memArray[_vCol[j]])
4431 memArray[_vCol[j]]->insertElements(_vLine.last()+1, nLinesToInsert);
4432 }
4433 }
4434
4435 if (nColsToInsert)
4436 {
4437 TableColumnArray arr(nColsToInsert);
4438 memArray.insert(memArray.begin()+_vCol.last()+1, std::make_move_iterator(arr.begin()), std::make_move_iterator(arr.end()));
4439 }
4440
4441 // resampler output buffer
4442 const double* dOutputSamples = 0;
4443 std::vector<double> dInputSamples(_vCol.size());
4444 int _ret_line = 0;
4445 int _final_cols = 0;
4446
4447 // Determine the number of final columns. These will stay constant only in
4448 // the column application direction
4449 if (Direction == ALL || Direction == GRID || Direction == LINES)
4450 _final_cols = samples.second;
4451 else
4452 _final_cols = _vCol.size();
4453
4454 // Resample the data table
4455 // Apply the resampling linewise
4456 for (size_t i = 0; i < _vLine.size(); i++)
4457 {
4458 for (size_t j = 0; j < _vCol.size(); j++)
4459 {
4460 dInputSamples[j] = readMem(_vLine[i], _vCol[j]).real();
4461 }
4462
4463 // If the resampler doesn't accept a further line
4464 // the buffer is probably full
4465 if (!_resampler->put_line(&dInputSamples[0]))
4466 {
4467 if (_resampler->status() != Resampler::STATUS_SCAN_BUFFER_FULL)
4468 {
4469 // Obviously not the case
4471 }
4472 else if (_resampler->status() == Resampler::STATUS_SCAN_BUFFER_FULL)
4473 {
4474 // Free the scan buffer of the resampler by extracting the already resampled lines
4475 while (true)
4476 {
4477 dOutputSamples = _resampler->get_line();
4478
4479 // dOutputSamples will be a nullptr, if no more resampled
4480 // lines are available
4481 if (!dOutputSamples)
4482 break;
4483
4484 for (int _fin = 0; _fin < _final_cols; _fin++)
4485 {
4486 writeData(_vLine.front()+_ret_line, _vCol.front()+_fin, dOutputSamples[_fin]);
4487 }
4488
4489 _ret_line++;
4490 }
4491
4492 // Try again to put the current line
4493 _resampler->put_line(&dInputSamples[0]);
4494 }
4495 }
4496 }
4497
4498 // Extract the remaining resampled lines from the resampler's memory
4499 while (true)
4500 {
4501 dOutputSamples = _resampler->get_line();
4502
4503 // dOutputSamples will be a nullptr, if no more resampled
4504 // lines are available
4505 if (!dOutputSamples)
4506 break;
4507
4508 for (int _fin = 0; _fin < _final_cols; _fin++)
4509 {
4510 writeData(_vLine.front()+_ret_line, _vCol.front()+_fin, dOutputSamples[_fin]);
4511 }
4512
4513 _ret_line++;
4514 }
4515
4516 // Delete empty lines
4517 if (Direction != LINES && samples.first < _vLine.size())
4518 deleteBulk(VectorIndex(_vLine.front() + samples.first, _vLine.last()), _vCol);
4519
4520 // Delete empty cols
4521 if (Direction != COLS && samples.second < _vCol.size())
4522 deleteBulk(_vLine, VectorIndex(_vCol.front() + samples.second, _vCol.last()));
4523
4524 // Reset the calculated lines and columns
4525 nCalcLines = -1;
4526 m_meta.modify();
4527
4528 return true;
4529}
4530
4531
A table column containing categorical values.
virtual size_t size() const override
Returns the number of elements in this column (will also count invalid ones).
A table column containing numerical values formatted as dates and times.
A table column containing logical values.
This class defines a dynamic size 2D matrix with a single 1D internal buffer. If the internal buffer ...
This class represents a single table in memory, or a - so to say - single memory page to be handled b...
Definition: memory.hpp:68
void writeDataDirect(int _nLine, int _nCol, const mu::value_type &_dData)
This member function provides an unsafe but direct way of writing data to the table....
Definition: memory.cpp:1249
ValueVector readMixedMem(const VectorIndex &_vLine, const VectorIndex &_vCol) const
This member function returns the elements stored at the selected positions.
Definition: memory.cpp:554
std::vector< mu::value_type > size(const VectorIndex &_vIndex, int dir) const
Implementation of the SIZE multi argument function.
Definition: memory.cpp:2873
size_t getHeadlineCount() const
This member function returns the number of lines needed for the table column headline of the selected...
Definition: memory.cpp:1153
~Memory()
Memory class destructor, which will free the allocated memory.
Definition: memory.cpp:73
NumeRe::Table extractTable(const std::string &_sTable, const VectorIndex &lines, const VectorIndex &cols)
Create a copy-efficient table object from the data contents.
Definition: memory.cpp:1803
bool getSaveStatus() const
Returns, whether the contents of the current table are already saved into either a usual file or into...
Definition: memory.cpp:1009
bool retouch(VectorIndex _vLine, VectorIndex _vCol, AppDir Direction=ALL)
This method is the retouching main method. It will redirect the control into the specialized member f...
Definition: memory.cpp:3680
std::vector< mu::value_type > getBins(size_t col, size_t nBins) const
Calculate the number of elements per bin in the selected column.
Definition: memory.cpp:3602
bool bSaveMutex
Definition: memory.hpp:94
std::vector< mu::value_type > countIfEqual(const VectorIndex &_vCols, const std::vector< mu::value_type > &vValues, const std::vector< std::string > &vStringValues) const
Counts all values in the selected columns, which match to the passed values (either numerically or st...
Definition: memory.cpp:3172
int getFilledElemsInColumn(size_t col) const
Returns the number of filled elements in the selected column without the trailing but with the intern...
Definition: memory.cpp:311
mu::value_type num(const VectorIndex &_vLine, const VectorIndex &_vCol) const
Implementation for the NUM multi argument function.
Definition: memory.cpp:2288
bool retouch1D(const VectorIndex &_vLine, const VectorIndex &_vCol, AppDir Direction)
This member function retouches single dimension data (along columns or rows).
Definition: memory.cpp:3744
void writeComment(const std::string &comment)
Update the comment associated with this table.
Definition: memory.cpp:1094
mu::value_type readMemInterpolated(double _dLine, double _dCol) const
This member function returns a (bilinearily) interpolated element at the selected double positions.
Definition: memory.cpp:398
std::string getComment() const
Return the comment associated with this table.
Definition: memory.cpp:1189
mu::value_type sum(const VectorIndex &_vLine, const VectorIndex &_vCol) const
Implementation for the SUM multi argument function.
Definition: memory.cpp:2254
int getLines(bool _bFull=false) const
This member function will return the number of lines, which are currently available in this table.
Definition: memory.cpp:258
bool convertColumns(const VectorIndex &_vCol, const std::string &_sType)
This member function tries to convert the selected columns to the target column type,...
Definition: memory.cpp:916
mu::value_type xor_func(const VectorIndex &_vLine, const VectorIndex &_vCol) const
Implementation for the XOR multi argument function.
Definition: memory.cpp:2446
NumeRe::TableMetaData getMetaData() const
Return the internal meta data structure.
Definition: memory.cpp:1202
mu::value_type std(const VectorIndex &_vLine, const VectorIndex &_vCol) const
Implementation for the STD multi argument function.
Definition: memory.cpp:2099
void markModified()
Mark this table as modified.
Definition: memory.cpp:1121
int getCols(bool _bFull=false) const
This member function will return the number of columns, which are currently available in this table.
Definition: memory.cpp:241
TableColumnArray memArray
Definition: memory.hpp:89
bool isValid() const
Returns true, if at least a single valid value is available in this table.
Definition: memory.cpp:833
std::vector< mu::value_type > getZScore(size_t col, const VectorIndex &_vIndex) const
Calculate the standardized values of the selected column.
Definition: memory.cpp:3575
mu::value_type norm(const VectorIndex &_vLine, const VectorIndex &_vCol) const
Implementation for the NORM multi argument function.
Definition: memory.cpp:2551
mu::value_type min(const VectorIndex &_vLine, const VectorIndex &_vCol) const
Implementation for the MIN multi argument function.
Definition: memory.cpp:2185
mu::value_type cmp(const VectorIndex &_vLine, const VectorIndex &_vCol, mu::value_type dRef=0.0, int _nType=0) const
Implementation for the CMP multi argument function.
Definition: memory.cpp:2587
Memory()
Default constructor.
Definition: memory.cpp:48
void writeData(int _nLine, int _nCol, const mu::value_type &_dData)
This member function writes the passed value to the selected position. The table is automatically enl...
Definition: memory.cpp:1219
mu::value_type pct(const VectorIndex &_vLine, const VectorIndex &_vCol, mu::value_type dPct=0.5) const
Implementation for the PCT multi argument function.
Definition: memory.cpp:2803
bool retouch2D(const VectorIndex &_vLine, const VectorIndex &_vCol)
This member function retouches two dimensional data (using a specialized filter class instance).
Definition: memory.cpp:3866
virtual bool isValue(int line, int col) override
Override for the virtual Sorter class member function. Returns true, if the selected element is a val...
Definition: memory.cpp:1786
size_t getSize() const
Returns the overall used number of bytes for this table.
Definition: memory.cpp:327
mu::value_type prd(const VectorIndex &_vLine, const VectorIndex &_vCol) const
Implementation for the PRD multi argument function.
Definition: memory.cpp:2220
void importTable(NumeRe::Table _table, const VectorIndex &lines, const VectorIndex &cols)
Import data from a copy-efficient table object. Completely replaces the contents, which were in the i...
Definition: memory.cpp:1834
long long int getLastSaved() const
This member function returns the time- point, where the table was saved last time.
Definition: memory.cpp:1534
bool isValidElement(size_t _nLine, size_t _nCol) const
Returns true, if the element at the selected positions is valid. Only checks internally,...
Definition: memory.cpp:817
void writeDataDirectUnsafe(int _nLine, int _nCol, const mu::value_type &_dData)
This member function provides an even more unsafe but direct way of writing data to the table....
Definition: memory.cpp:1271
void reorderColumn(const VectorIndex &vIndex, int i1, int i2, int j1=0)
This member function simply reorders the contents of the selected column using the passed index vecto...
Definition: memory.cpp:1744
bool clear()
This member function frees the internally used memory block completely.
Definition: memory.cpp:141
mu::value_type readMem(size_t _nLine, size_t _nCol) const
This member function returns the element stored at the selected position.
Definition: memory.cpp:350
Boundary findValidBoundary(const VectorIndex &_vLine, const VectorIndex &_vCol, int i, int j) const
This member function finds the smallest possible boundary around a set of invalid values to be used a...
Definition: memory.cpp:3949
AppDir
Definition: memory.hpp:71
@ GRID
Definition: memory.hpp:75
@ LINES
Definition: memory.hpp:73
@ COLS
Definition: memory.hpp:74
@ ALL
Definition: memory.hpp:72
mu::value_type avg(const VectorIndex &_vLine, const VectorIndex &_vCol) const
Implementation for the AVG multi argument function.
Definition: memory.cpp:2132
Matrix readMemAsMatrix(const VectorIndex &_vLine, const VectorIndex &_vCol) const
This member function returns the elements stored at the selected positions as a Matrix.
Definition: memory.cpp:497
mu::value_type or_func(const VectorIndex &_vLine, const VectorIndex &_vCol) const
Implementation for the OR multi argument function.
Definition: memory.cpp:2394
std::vector< mu::value_type > minpos(const VectorIndex &_vIndex, int dir) const
Implementation of the MINPOS multi argument function.
Definition: memory.cpp:2946
void setSaveStatus(bool _bIsSaved)
This member function changes the saved state to the passed value.
Definition: memory.cpp:1518
mu::value_type getSpearmanCorr(size_t col1, const VectorIndex &_vIndex1, size_t col2, const VectorIndex &_vIndex2) const
Implements the scorr() table method and calculates the spearman correlation coefficient of the two se...
Definition: memory.cpp:3437
AnovaResult getOneWayAnova(size_t colCategories, size_t colValues, const VectorIndex &_vIndex, double significance) const
Calculates the simples form of a ANOVA F test.
Definition: memory.cpp:3288
bool smooth(VectorIndex _vLine, VectorIndex _vCol, NumeRe::FilterSettings _settings, AppDir Direction=ALL)
This member function smoothes the data described by the passed VectorIndex indices using the passed F...
Definition: memory.cpp:4138
mu::value_type getCovariance(size_t col1, const VectorIndex &_vIndex1, size_t col2, const VectorIndex &_vIndex2) const
Implements the cov() table method and calculates the covariance of the two selected columns.
Definition: memory.cpp:3380
bool onlyValidValues(const VectorIndex &_vLine, const VectorIndex &_vCol) const
This method is a wrapper for detecting, whether a row or column does only contain valid values (no Na...
Definition: memory.cpp:3930
void writeSingletonData(Indices &_idx, const mu::value_type &_dData)
This member function writes multiple copies of a single value to a range in the table....
Definition: memory.cpp:1394
std::vector< mu::value_type > maxpos(const VectorIndex &_vIndex, int dir) const
Implementation of the MAXPOS multi argument function.
Definition: memory.cpp:3027
std::vector< mu::value_type > getRank(size_t col, const VectorIndex &_vIndex, RankingStrategy _strat) const
Rank the selected column according the selected ranking strategy.
Definition: memory.cpp:3501
mu::value_type med(const VectorIndex &_vLine, const VectorIndex &_vCol) const
Implementation for the MED multi argument function.
Definition: memory.cpp:2736
mu::value_type getPearsonCorr(size_t col1, const VectorIndex &_vIndex1, size_t col2, const VectorIndex &_vIndex2) const
Implements the pcorr() table method and calculates the pearson correlation coefficient of the two sel...
Definition: memory.cpp:3413
mu::value_type cnt(const VectorIndex &_vLine, const VectorIndex &_vCol) const
Implementation for the CNT multi argument function.
Definition: memory.cpp:2508
void copyElementsInto(std::vector< mu::value_type > *vTarget, const VectorIndex &_vLine, const VectorIndex &_vCol) const
This member function will copy the selected elements into the passed vector instance....
Definition: memory.cpp:768
int getElemsInColumn(size_t col) const
Returns the number of elements in the selected column (but might contain invalid values).
Definition: memory.cpp:293
TableColumn::ColumnType getType(const VectorIndex &_vCol) const
Returns the "common" type of the selected columns.
Definition: memory.cpp:657
Memory & operator=(const Memory &other)
Assignment operator.
Definition: memory.cpp:160
bool resizeMemory(size_t _nLines, size_t _nCols)
This member function will handle all memory grow operations by doubling the base size,...
Definition: memory.cpp:221
size_t getAppendedZeroes(size_t _i) const
Returns the number of empty cells at the end of the selected columns.
Definition: memory.cpp:1136
virtual int compare(int i, int j, int col) override
Override for the virtual Sorter class member function. Returns 0, if both elements are equal,...
Definition: memory.cpp:1767
bool Allocate(size_t _nNCols, bool shrink=false)
This member function is the Memory class allocator. It will handle all memory allocations.
Definition: memory.cpp:89
ValueVector getCategoryList(const VectorIndex &_vCol) const
Returns a key-value list containing the categories and their respective index.
Definition: memory.cpp:685
void deleteEntry(int _nLine, int _nCol)
This member function deletes a single entry from the memory table.
Definition: memory.cpp:1952
std::vector< int > sortElements(int i1, int i2, int j1=0, int j2=0, const std::string &sSortingExpression="")
This member function is the interface function for the Sorter class. It will pre- evaluate the passed...
Definition: memory.cpp:1555
void smoothingWindow2D(const VectorIndex &_vLine, const VectorIndex &_vCol, size_t i, size_t j, NumeRe::Filter *_filter)
This private member function realizes the application of a smoothing window to 2D data sets.
Definition: memory.cpp:4069
mu::value_type max(const VectorIndex &_vLine, const VectorIndex &_vCol) const
Implementation for the MAX multi argument function.
Definition: memory.cpp:2150
RankingStrategy
Definition: memory.hpp:79
@ RANK_DENSE
Definition: memory.hpp:80
@ RANK_COMPETETIVE
Definition: memory.hpp:81
@ RANK_FRACTIONAL
Definition: memory.hpp:82
mu::value_type and_func(const VectorIndex &_vLine, const VectorIndex &_vCol) const
Implementation for the AND multi argument function.
Definition: memory.cpp:2334
bool setCategories(const VectorIndex &_vCol, const std::vector< std::string > &vCategories)
Updates the categories of a categorical column and switches the column type if necessary.
Definition: memory.cpp:963
void calculateStats(const VectorIndex &_vLine, const VectorIndex &_vCol, std::vector< StatsLogic > &operation) const
Driver code for simplifying the calculation of various stats using OpenMP, if possible.
Definition: memory.cpp:2021
bool shrink()
This member function shrinks the table memory to the smallest possible dimensions reachable in powers...
Definition: memory.cpp:847
void setMetaData(const NumeRe::TableMetaData &meta)
Update the internal meta data with the passed one.
Definition: memory.cpp:1109
std::vector< mu::value_type > findCols(const std::vector< std::string > &vColNames) const
Finds the columns IDs, whose headlines match to the passed strings. Can return multiple column IDs pe...
Definition: memory.cpp:3140
Memory * extractRange(const VectorIndex &_vLine, const VectorIndex &_vCol) const
This member function extracts a range of this table and returns it as a new Memory instance.
Definition: memory.cpp:723
ValueVector readMemAsString(const VectorIndex &_vLine, const VectorIndex &_vCol) const
This member function returns the elements stored at the selected positions.
Definition: memory.cpp:606
NumeRe::TableMetaData m_meta
Definition: memory.hpp:90
bool save(std::string _sFileName, const std::string &sTableName, unsigned short nPrecision)
This member function is used for saving the contents of this memory page into a file....
Definition: memory.cpp:1897
void createTableHeaders()
This member function creates the column headlines, if they are empty.
Definition: memory.cpp:121
bool setHeadLineElement(size_t _i, const std::string &_sHead)
Writes a new table column headline to the selected column.
Definition: memory.cpp:1068
void convert()
This member function tries to convert all string columns to value columns, if it is possible.
Definition: memory.cpp:888
bool bSortCaseInsensitive
Definition: memory.hpp:95
bool resample(VectorIndex _vLine, VectorIndex _vCol, std::pair< size_t, size_t > samples, AppDir Direction=ALL, std::string sFilter="lanczos3")
This member function resamples the data described by the passed coordinates using the new samples nSa...
Definition: memory.cpp:4315
void smoothingWindow1D(const VectorIndex &_vLine, const VectorIndex &_vCol, size_t i, size_t j, NumeRe::Filter *_filter, bool smoothLines)
This private member function realizes the application of a smoothing window to 1D data sets.
Definition: memory.cpp:4004
int nCalcLines
Definition: memory.hpp:92
std::vector< mu::value_type > getIndex(size_t col, const std::vector< mu::value_type > &vValues, const std::vector< std::string > &vStringValues) const
Determines the positions of all elements, which correspond to the passed values (either numerically o...
Definition: memory.cpp:3233
void deleteBulk(const VectorIndex &_vLine, const VectorIndex &_vCol)
This member function deletes a whole range of entries from the memory table.
Definition: memory.cpp:1982
std::string getHeadLineElement(size_t _i) const
Returns the table column headline for the selected column. Will return a default headline,...
Definition: memory.cpp:1025
This is an abstract base class for any type of a data filter. Requires some methods to be implemented...
Definition: filtering.hpp:117
virtual mu::value_type apply(size_t i, size_t j, const mu::value_type &val) const =0
Virtual method for applying the filter to a distinct value. Has to be implemented in all child classe...
FilterBuffer2D & get2DBuffer()
This method returns the internal filtering buffer queue for 2D data to store already smoothed points ...
Definition: filtering.hpp:234
bool isConvolution() const
This method returns, whether the current filter is a convolution, ie. whether the returned value may ...
Definition: filtering.hpp:181
FilterBuffer & getBuffer()
This method returns the internal filtering buffer queue to store already smoothed points avoiding lea...
Definition: filtering.hpp:220
std::pair< size_t, size_t > getWindowSize() const
This method returns the window size of the current filter as a std::pair in the order (row,...
Definition: filtering.hpp:207
Template class representing a generic file. This class may be specified for the main data type contai...
Definition: file.hpp:68
void setTextfilePrecision(unsigned short nPrecision)
Set the precision, which shall be used to convert the floating point numbers into strings.
Definition: file.hpp:1184
void setTableName(const std::string &name)
Set the table's name.
Definition: file.hpp:1170
void setData(TableColumnArray *data, long long int rows, long long int cols)
This method refernces the passed external data internally. The data is not copied and must exist as l...
Definition: file.hpp:1229
virtual bool write()=0
Pure virtual declaration of the write access method. Has to be implemented in all derived classes and...
void setDimensions(long long int rows, long long int cols)
Sets the dimensions of the data table, which will be used in the future. Clears the internal memory i...
Definition: file.hpp:1155
std::string getExtension()
Returns the file extension.
Definition: file.hpp:951
This class resembles the binary NumeRe data file format. The data is red and written in binary mode u...
Definition: file.hpp:1471
This class is a specialized WeightedLinearFilter used to retouch missing data values.
Definition: filtering.hpp:838
mu::value_type retouch(size_t i, size_t j, const mu::value_type &val, const mu::value_type &med)
This method is a wrapper to retouch only invalid values. The default value of invalid values is the m...
Definition: filtering.hpp:1172
void setBoundaries(const std::vector< mu::value_type > &left, const std::vector< mu::value_type > &right, const std::vector< mu::value_type > &top=std::vector< mu::value_type >(), const std::vector< mu::value_type > &bottom=std::vector< mu::value_type >())
This method is used to update the internal filter boundaries.
Definition: filtering.hpp:1151
This data container is a copy- efficient table to interchange data between Kernel and GUI.
Definition: table.hpp:87
void setName(const std::string &_sName)
Setter function for the table name.
Definition: table.cpp:186
void setColumn(size_t j, TableColumn *column)
Assigns a whole column to the internal array.
Definition: table.cpp:357
size_t getCols() const
Get the number of columns.
Definition: table.cpp:656
TableMetaData getMetaData() const
Getter function for the table meta data.
Definition: table.cpp:403
void setMetaData(const TableMetaData &meta)
Setter function for the table meta data.
Definition: table.cpp:200
size_t getLines() const
Get the number of lines.
Definition: table.cpp:636
TableColumn * getColumn(size_t j) const
Returns a copy of the internal column array or a nullptr, if the column does not exist or is empty.
Definition: table.cpp:604
static void issueWarning(std::string sWarningMessage)
This static function may be used to issue a warning to the user. The warning will be printed by the t...
Definition: kernel.cpp:2833
@ BOUNDARY_CLAMP
Definition: resampler.h:35
@ STATUS_SCAN_BUFFER_FULL
Definition: resampler.h:43
ColumnKeys * evaluateKeyList(std::string &sKeyList, long long int nColumnCount)
This public member function creates a ColumnKeys object from a string containing the hierarchical sor...
Definition: sorter.cpp:252
bool qSort(int *nIndex, int nElements, int nColumn, long long int nLeft, long long int nRight, int nSign)
This public member function is the interface to the quicksort algorithm, which itself is implemented ...
Definition: sorter.cpp:40
bool sortSubList(int *nIndex, int nElements, ColumnKeys *KeyList, long long int i1, long long int i2, long long int j1, int nSign, long long int nColumns)
This public member function handles the hierarchical sorting process of many columns together....
Definition: sorter.cpp:196
A table column containing only strings as values.
Common exception class for all exceptions thrown in NumeRe.
Definition: error.hpp:32
@ CANNOT_SMOOTH_CACHE
Definition: error.hpp:83
@ INTERNAL_RESAMPLER_ERROR
Definition: error.hpp:123
@ NO_CACHED_DATA
Definition: error.hpp:158
@ CANNOT_SAVE_FILE
Definition: error.hpp:82
@ CANNOT_SORT_CACHE
Definition: error.hpp:84
@ TOO_LARGE_CACHE
Definition: error.hpp:217
@ CANNOT_RESAMPLE_CACHE
Definition: error.hpp:79
@ INVALID_INDEX
Definition: error.hpp:129
static size_t invalid_position
Definition: error.hpp:235
A table column containing only numerical values.
This class abstracts all the index logics, i.e. the logical differences between single indices and in...
Definition: structures.hpp:42
bool isValid() const
This member function determines, whether the internal index set is valid.
Definition: structures.hpp:601
void linearize()
This member function linearizes the contents of a vector-described index set. The vectorial informati...
Definition: structures.hpp:276
VectorIndex subidx(size_t pos, size_t nLen=std::string::npos) const
This member function returns a subset of the internal stored index just like the std::string::substr(...
Definition: structures.hpp:238
void setOpenEndIndex(int nLast) const
This member function can be used to replace the open end state with a defined index value although th...
Definition: structures.hpp:756
void prepend(const std::vector< int > &vVector)
This function will prepend the passed vector before the beginning of the index vector....
Definition: structures.hpp:491
int last() const
This member function returns the last index value, which can be reached by the values stored internal...
Definition: structures.hpp:693
int max() const
This function calculates the maximal index value obtained from the values stored internally.
Definition: structures.hpp:558
size_t size() const
This member function returns the size of the indices stored in this class.
Definition: structures.hpp:314
bool isOpenEnd() const
This member function determines, whether the internal index set has an open end.
Definition: structures.hpp:614
void setRange(int nMin, int nMax)
This member function can be used to force the indices stored internally to be in a defined interval....
Definition: structures.hpp:712
void append(const std::vector< int > &vVector)
This function will append the passed vector to the end of the index vector. The internal storage is e...
Definition: structures.hpp:451
std::string to_string() const
This member function converts the vector indexes contents into a human-readable string representation...
Definition: structures.hpp:770
bool isOrdered() const
This member function determines, whether the single indices are in the correct order.
Definition: structures.hpp:361
bool isExpanded() const
This member function determines, whether the indices are calculated or actual vectorial indices.
Definition: structures.hpp:377
std::vector< int > getVector() const
This member function returns a STL vector, which will resemble the indices stored internally....
Definition: structures.hpp:531
int min() const
This member function calculates the minimal index value obtained from the values stored internally.
Definition: structures.hpp:576
int & front()
This member function returns a reference to the first index value stored internally.
Definition: structures.hpp:640
#define MAX_TABLE_COLS
Definition: memory.cpp:37
static void evaluateRankingStrategy(std::vector< mu::value_type > &vRank, size_t &nEqualRanks, Memory::RankingStrategy _strat)
Evaluate the identical ranked values according the selected ranking strategy.
Definition: memory.cpp:3465
#define DEFAULT_COL_TYPE
Definition: memory.cpp:38
static bool closeEnough(double d1, double d2)
Static helper function to ensure that two doubles are actually close enough to be considered equal.
Definition: memory.cpp:3109
static mu::value_type nanAvg(const std::vector< mu::value_type > &values)
This static helper function calculates the average value respecting NaNs.
Definition: memory.cpp:367
std::vector< std::string > ValueVector
This type defines a generic value vector.
Definition: memory.hpp:58
std::complex< double > intPower(const std::complex< double > &, int)
This function calculates the power of a value with the specialization that the exponent is an integer...
Definition: tools.cpp:3640
GenericFile * getFileByType(const string &filename)
This function determines the correct class to be used for the filename passed to this function....
Definition: file.cpp:45
std::queue< mu::value_type > FilterBuffer
Typedef for simplifying the usage of the buffer.
Definition: filtering.hpp:107
std::queue< std::vector< mu::value_type > > FilterBuffer2D
Definition: filtering.hpp:108
Filter * createFilter(const FilterSettings &_settings)
This function creates an instance of the filter specified by the passed FilterSettings structure.
Definition: filtering.hpp:814
CONSTCD11 std::chrono::duration< Rep, Period > abs(std::chrono::duration< Rep, Period > d)
Definition: date.h:1317
MUP_BASETYPE value_type
The numeric datatype used by the parser.
Definition: muParserDef.h:251
bool isnan(const value_type &v)
Definition: muParserDef.h:379
std::vector< double > real(const std::vector< value_type > &vVec)
value_type rint(value_type v)
bool isreal(value_type *v, int nElem)
Definition: muParserDef.h:384
std::string get(const std::string &sUrl, const std::string &sUserName, const std::string &sPassWord)
Get the contents of a URL.
Definition: http.cpp:251
#define min(a, b)
Definition: resampler.cpp:34
#define max(a, b)
Definition: resampler.cpp:30
int findParameter(const std::string &sCmd, const std::string &sParam, const char cFollowing)
This function searches the passed parameter in the passed command string. If something is found,...
Definition: tools.cpp:113
Contains the relevant results of the ANOVA F test.
Definition: memory.hpp:40
mu::value_type m_FRatio
Definition: memory.hpp:41
mu::value_type m_significance
Definition: memory.hpp:43
mu::value_type m_significanceVal
Definition: memory.hpp:42
bool m_isSignificant
Definition: memory.hpp:44
size_t m_numCategories
Definition: memory.hpp:45
This structure contains the information of a two-dimensional boundary.
long long int m
size_t cols
long long int n
long long int re()
long long int cf()
long long int rf()
size_t rows
long long int ce()
Structure for the sorting functionality: used for the recursive definition of the index columns for s...
ColumnKeys * subkeys
This structure is central for managing the indices of a table or cluster read or write data access....
VectorIndex col
VectorIndex row
This structure contains the necessary information to create an instance of one of the following filte...
Definition: filtering.hpp:40
Encapsulating structure to gather all table meta data information.
Definition: table.hpp:32
__time64_t lastSavedTime
Definition: table.hpp:35
std::string comment
Definition: table.hpp:33
Simplify the creation of some statistics by externalizing the operation code and unifying the driver ...
Definition: statslogic.hpp:30
@ OPERATION_ADDSQSUB
Definition: statslogic.hpp:36
Abstract table column, which allows using it to compose the data table in each Memory instance.
Definition: tablecolumn.hpp:34
std::vector< std::string > getValueAsInternalString(const VectorIndex &idx) const
Returns the table column's contents as a vector containing internal strings.
Definition: tablecolumn.cpp:56
static ColumnType stringToType(const std::string &sType)
Converts the passed string representation to a ColumnType value.
ColumnType m_type
Definition: tablecolumn.hpp:49
static std::string getDefaultColumnHead(size_t colNo)
Creates a default column headline for a column, which can be used without an instance of this class.
std::string m_sHeadLine
Definition: tablecolumn.hpp:48
long long int intCast(const std::complex< double > &)
Casts the real part of the complex number to an integer and avoids rounding errors.
Definition: tools.cpp:1824
std::string toString(int)
Converts an integer to a string without the Settings bloat.
std::unique_ptr< TableColumn > TblColPtr
Typedef for simplifying the usage of a smart pointer in combination with a TableColumn instance.
std::vector< TblColPtr > TableColumnArray
This typedef represents the actual table, which is implemented using a std::vector.
void convert_for_overwrite(TblColPtr &col, size_t colNo, TableColumn::ColumnType type)
This function deletes the contents of a column, if necessary, and creates a new column with the corre...
void convert_if_empty(TblColPtr &col, size_t colNo, TableColumn::ColumnType type)
Tries to convert a column if the column does not contain any data (with the exception of the header).
string getArgAtPos(const string &sCmd, unsigned int nPos, int extraction)
Extracts a options value at the selected position and applies automatic parsing, if necessary.
Definition: tools.cpp:1598
size_t qSortDouble(double *dArray, size_t nlength)
This is a wrapper for the standard qsort algorithm. It returns the number of valid elements and sorts...
Definition: tools.cpp:3762