das2C
das core C utilities (v3)
dataset.h
Go to the documentation of this file.
1 /* Copyright (C) 2017-2018 Chris Piker <chris-piker@uiowa.edu>
2  *
3  * This file is part of das2C, the Core Das2 C Library.
4  *
5  * das2C is free software; you can redistribute it and/or modify it under
6  * the terms of the GNU Lesser General Public License version 2.1 as published
7  * by the Free Software Foundation.
8  *
9  * das2C is distributed in the hope that it will be useful, but WITHOUT ANY
10  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
12  * more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public License
15  * version 2.1 along with das2C; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 
21 #ifndef _das_dataset_h_
22 #define _das_dataset_h_
23 
24 #include <das2/dimension.h>
25 #include <das2/codec.h>
26 
27 #ifdef __cplusplus
28 extern "C" {
29 #endif
30 
31 /* Old initial comment that kicked off the entire das2 data model design...
32  *
33  * The structures below are the start of an idea on how to get independent
34  * parameters for data at any particular index. These are just thoughts
35  * at the moment and don't affect any working code. There are many ways
36  * to do this. The CDF and QStream assumption is that there are the same
37  * number of parameters locating a data point in parameter space as there
38  * are indices to the dataset. Because of this x,y,z scatter data are
39  * hard to handle.
40  *
41  * For x,y,z scatter lists there is 1 index for any point in the dataset,
42  * but for each index there are 2 independent parameters. Basically QStream
43  * and CDF assume that all datasets are CUBEs in parameter space but this
44  * is not the case for a great many sets.
45  *
46  * To adequately handle these 'path' datasets a parameter map is required.
47  * The mapping takes 1 index value per data rank and returns 1 to N parameter
48  * values.
49  *
50  * These structures start to handle this idea but are just doodles at this
51  * point. -cwp 2017-07-25
52  */
53 
54 /* Second comment that added desire for flexible data types ...
55  *
56  * Thinking about coordinate returns, how about a data set of thefts / month
57  * in 5 cities... Won't usually come up, but should be possible
58  * to handle. Here's the data set:
59  *
60  * 2016-01 2016-02 2016-03 2016-04 2016-05 2016-06
61  * Baltimore 2351 3789 4625 5525 6135 5902
62  * Bogotaƃ 109065 110365 99625 98265 43850 33892
63  * Chicago 4789 5764 8901 10145 13456 22678
64  * Des Moines 4 10 33 35 44 107
65  *
66  * Properties: Title -> "Thefts/Month for selected cities"
67  *
68  * Okay, the X axis data type is text[12] (need null char)
69  * Y axis data type is datetime
70  * Z axis data type is datum, "thefts month**-1"
71  *
72  * So what is the return value from pDs->bin(pDs, 0, 0) ?
73  *
74  * The bin is defined on the space of all UTC times, and on the space of all
75  * cities in the data set.
76  *
77  *
78  * So, what about this common data set, interference events:
79  *
80  * |<---------- Bin ------>| |<----- Value --->|
81  * 2016-01-01T14:00 2016-01-02T02:20 Mag Roll
82  * 2016-01-01T15:40 2016-01-01T15:41 Stabilization Pulse
83  * 2016-01-01T15:43 2016-01-01T15:44 Stabilization Pulse
84  * 2016-01-01T15:45 2016-01-01T15:47 Stabilization Pulse
85  * 2016-01-01T15:48 2016-01-01T15:50 Stabilization Pulse
86  *
87  * So what is the return value from pDs->bin(pDs, 0) ?
88  *
89  * The space is UTC time, So each bin start and stop is defined on the space
90  * of all UTC times.
91  * -cwp 2017-??-??
92  */
93 
94 /* Number of encoders that can be stored internally, more then this and they
95  * have to be allocated on the heap. This is the common "small vector"
96  * optimization */
97 #define DASDS_LOC_ENC_SZ 32
98 
139 typedef struct dataset {
140  DasDesc base; /* This would be equivalent to the properties for
141  a packet descriptor. Typically in das 2.2 packets
142  don't have a descriptor, only streams and planes
143  but access to the stream descriptor forwards through
144  here. */
145 
146  int nRank; /* The number of whole-dataset index dimenions.
147  * Variables can define internal dimensions but they
148  * can't use indices in the first nRank positions for
149  * internal use, as these are used to correlate values
150  * across the dataset. */
151 
152  /* A text identifier for this instance of a data set */
153  char sId[DAS_MAX_ID_BUFSZ];
154 
155  /* A text identifier for the join group for this dataset. Datasets with
156  * the same groupID should be joined automatically by display clients.
157  */
158  char sGroupId[DAS_MAX_ID_BUFSZ];
159 
160  size_t uDims; /* Number of dimensions, das datasets are
161  * implicitly bundles in qdataset terms. */
162 
163  DasDim** lDims; /* The data variable object arrays */
164  size_t uSzDims; /* Current size of dimension array */
165 
166  size_t uArrays; /* The number of low-level arrays */
167  DasAry** lArrays; /* An array of array objects */
168  size_t uSzArrays;
169 
170  ptrdiff_t _shape[DASIDX_MAX]; /* cache shape calls for speed */
171 
172  bool _dynamic; /* If true, the dataset may still be changing and all
173  bulk properties such as the iteration shape should be
174  recalculated instead of using cached values.
175  If false, cached values are expected to already be
176  available */
177 
178  /* dataset arrays can be written in chunks to output buffers. The number of
179  * elements in each chuck, the encoding of each element any separators are
180  * defined below. */
181  /* DasCodec** lEncs; */
182 
183  /* Use a fixed size for now */
184  size_t uSzEncs;
185  DasCodec aPktEncs[DASDS_LOC_ENC_SZ];
186  int nPktItems[DASDS_LOC_ENC_SZ];
187 
195  void* pUser;
196 
197 } DasDs;
198 
233 DAS_API DasDs* new_DasDs(
234  const char* sId, const char* sGroupId, int nRank
235 );
236 
246 DAS_API void del_DasDs(DasDs* pThis);
247 
264 DAS_API void DasDs_setMutable(DasDs* pThis, bool bChangeAllowed);
265 
266 
271 #define DasDs_mutable(P) P->_mutable
272 
316 DAS_API int DasDs_shape(const DasDs* pThis, ptrdiff_t* pShape);
317 
335 DAS_API ptrdiff_t DasDs_lengthIn(const DasDs* pThis, int nIdx, ptrdiff_t* pLoc);
336 
375 typedef struct dasds_iterator_t{
376 
378  bool done;
379 
382  ptrdiff_t index[DASIDX_MAX];
383 
384  int rank;
385  ptrdiff_t shape[DASIDX_MAX]; /* Used for CUBIC datasets */
386  ptrdiff_t nLenIn; /* Used for ragged datasets */
387  bool ragged;
388  const DasDs* pDs;
390 
407 DAS_API void dasds_iter_init(dasds_iterator* pIter, const DasDs* pDs);
408 
425 DAS_API bool dasds_iter_next(dasds_iterator* pIter);
426 
427 
444 #define DasDs_group(P) ((const char*)(P)->sGroupId)
445 
452 #define DasDs_id(P) ((const char*)(P)->sId)
453 
454 
471 #define DasDs_rank(P) ((P)->nRank)
472 
493 DAS_API DasErrCode DasDs_addAry(DasDs* pThis, DasAry* pAry);
494 
495 
500 #define DasDs_numAry(P) ((P)->uArrays)
501 
509 #define DasDs_getAry(P, I) ((P)->lArrays[(I)])
510 
511 
526 DAS_API DasAry* DasDs_getAryById(DasDs* pThis, const char* sAryId);
527 
549 DAS_API size_t DasDs_memUsed(const DasDs* pThis);
550 
555 DAS_API size_t DasDs_memIndexed(const DasDs* pThis);
556 
576 DAS_API size_t DasDs_memOwned(const DasDs* pThis);
577 
578 
614  DasDs* pThis, const char* sAryId, const char* sSemantic,
615  const char* sEncType, int nItemBytes, int nNumItems
616 );
617 
652  DasDs* pThis, const char* sAryId, const char* sEncType,
653  int nItemBytes, int nSeps, ubyte uSepLen, const ubyte* pSepByIdx
654 );
655 
669 DAS_API size_t DasDs_clearRagged0Arrays(DasDs* pThis);
670 
671 
695  DasDs* pThis, enum dim_type dType, const char* sDim, const char* sId
696 );
697 
712 DAS_API DasErrCode DasDs_addDim(DasDs* pThis, DasDim* pDim);
713 
721 DAS_API size_t DasDs_numDims(const DasDs* pThis, enum dim_type vt);
722 
723 
730 DAS_API const DasDim* DasDs_getDim(const DasDs* pThis, const char* sDim);
731 
739 DAS_API const DasDim* DasDs_getDimByIdx(
740  const DasDs* pThis, size_t idx, enum dim_type vt
741 );
742 
750 DAS_API const DasDim* DasDs_getDimById(const DasDs* pThis, const char* sId);
751 
752 
761 DAS_API char* DasDs_toStr(const DasDs* pThis, char* sBuf, int nLen);
762 
763 
764 
765 /* Ideas I'm still working on...
766 
767 
768 / * The two functions below are really useful but I'll need to crack open
769  a double pack of Flex and Bison to get it done so I'm punting for now. * /
770 
771 / * Ex Expression: $spec_dens[i][j][k] * /
772 const Function* Dataset_evalDataExp(Dataset* pThis, const char* sExpression);
773 
774 / * Ex Expression: $craft_alt[i][j] - 0.5 * $delay_time[k] * 299792 * /
775 const Function* Dataset_evalCoordExp(Dataset* pThis, const char* sExpression);
776 
777 
778 / **
779  *
780  * This function answers the question by either provided the spanning set of
781  * coordinates or returning nothing.
782  * For a dataset to be defined
783  * on a coordinate grid there must exist one coordinate set for each index in
784  * the data set and each coordinate must be a function of only one index.
785  *
786  * Non-gridded data can still be sliced but coordintate slices will need to be
787  * produced as well in order to plot the slice. See Dataset_orthogonal()
788  *
789  * @param pThis A correlated dataset object
790  * @param sDs The string id of the dataset in question
791  * @param[out] psCoords a pointer to a const char* array to recived the
792  * coordinate ID's forming the spanning set. Note that every
793  * combination of returned coordinates satisfies the orthogonal
794  * condition and would return true from Dataset_orthogonal().
795  *
796  * @return The number of spanning coordinates. Will be equal to the
797  * rank of the dataset.
798  * /
799 size_t Dataset_gridCoords(
800  const Dataset* pThis, const char* sDs, const char** psCoords
801 );
802 
803 bool DataGen_grid(const DataSet* pDataset);
804 
805 const DataSet** Dg_griddedIn(const DataSet* pDataset);
806 
807 
808 
809 / ** Get the coefficients for iterating over a 1-D slice of a regular (i.e.
810  * non-ragged) dataset.
811  *
812  * This function dose not work for ragged datasets and merely returns NULL if
813  * asked for iteration coefficents for such a set. In such a case use
814  * Dataset_copySlice1D().
815  *
816  * /
817 const void* Dataset_slice1D(
818  const Dataset* pThis, const char* sDs, const char* sCoord, int iCoordIdx,
819  int* pCoeff
820 );
821 
822 / ** Increment the reference count on any array objects that are part of
823  * a data space.
824  *
825  * This is useful in instances where the underlying data arrays are going
826  * to be represented by an organizational structure other than datasets
827  * and DataSets since Das array objects only free data memory if thier
828  * feference count is zero.
829  *
830  * @param pThis
831  * /
832 void DataSpace_incAryRef(Dataset* pThis);
833 
834 / * Need a way to trigger callbacks from datasets changing, not just
835  packets changing. It could be useful to work on items from the
836  dataset level instead of just the packet level * /
837  bool DataSpace_stream(Dataset* pThis); * /
838 
839 
840 
841 / ** Indicate the physical degrees of freedom for a dataset by denoting a
842  * complete list of coordinate sets.
843  *
844  * A list of coordinates over which an entire dataset is defined is called
845  * a span. Datasets may have 1-N spans.
846  * /
847 int DataSpace_addSpan(const char* sDsId, const char** lCoords, size_t nCoords);
848 */
849 
850 #ifdef __cplusplus
851 }
852 #endif
853 
854 #endif /* _das_dataset_h */
#define DASIDX_MAX
The maximum number of array indices in das2.
Definition: array.h:43
Encoding/Decoding arrays to and from buffers.
DAS_API size_t DasDs_memIndexed(const DasDs *pThis)
The apparent memory usage of all arrays in the dataset.
DAS_API DasErrCode DasDs_addDim(DasDs *pThis, DasDim *pDim)
Add a physical dimension to the dataset.
int DasErrCode
return code type 0 indicates success, negative integer indicates failure
Definition: defs.h:164
Dynamic recursive ragged arrays.
Definition: array.h:270
DAS_API size_t DasDs_clearRagged0Arrays(DasDs *pThis)
Clear any arrays that are ragged in index l.
Reading and writing array data to buffers.
Definition: codec.h:40
Base structure for Stream Header Items.
Definition: descriptor.h:74
Das Physical Dimensions.
Definition: dimension.h:128
Das Datasets.
Definition: dataset.h:139
DAS_API DasErrCode DasDs_addRaggedCodec(DasDs *pThis, const char *sAryId, const char *sEncType, int nItemBytes, int nSeps, ubyte uSepLen, const ubyte *pSepByIdx)
Define a packet data encoder for variable length items and arrays.
DAS_API const DasDim * DasDs_getDimByIdx(const DasDs *pThis, size_t idx, enum dim_type vt)
Get a dimension by index.
DAS_API DasDim * DasDs_makeDim(DasDs *pThis, enum dim_type dType, const char *sDim, const char *sId)
Make a new dimension within this dataset.
DAS_API const DasDim * DasDs_getDimById(const DasDs *pThis, const char *sId)
Get a dimension by string id.
DAS_API DasDs * new_DasDs(const char *sId, const char *sGroupId, int nRank)
Create a new dataset object.
DAS_API size_t DasDs_memUsed(const DasDs *pThis)
Get the currently used memory of all arrays in the dataset.
DAS_API void del_DasDs(DasDs *pThis)
Delete a Data object, cleaning up it's memory.
DAS_API char * DasDs_toStr(const DasDs *pThis, char *sBuf, int nLen)
Print a string representation of this dataset.
DAS_API size_t DasDs_memOwned(const DasDs *pThis)
Get the currently allocated memory of all arrays in the dataset.
DAS_API void DasDs_setMutable(DasDs *pThis, bool bChangeAllowed)
Lock/Unlock the dataset for changes.
DAS_API DasErrCode DasDs_addAry(DasDs *pThis, DasAry *pAry)
Add an array to the dataset, stealing it's reference.
DAS_API size_t DasDs_numDims(const DasDs *pThis, enum dim_type vt)
Get the number of physical dimensions in this dataset.
DAS_API DasErrCode DasDs_addFixedCodec(DasDs *pThis, const char *sAryId, const char *sSemantic, const char *sEncType, int nItemBytes, int nNumItems)
Define a packet data encoded/decoder for fixed length items and arrays.
DAS_API const DasDim * DasDs_getDim(const DasDs *pThis, const char *sDim)
Get a dimension by it's basic kind.
DAS_API int DasDs_shape(const DasDs *pThis, ptrdiff_t *pShape)
Return current valid ranges for whole data set iteration.
DAS_API DasAry * DasDs_getAryById(DasDs *pThis, const char *sAryId)
Get a dataset array given it's identifier.
DAS_API ptrdiff_t DasDs_lengthIn(const DasDs *pThis, int nIdx, ptrdiff_t *pLoc)
Return the current max value index value + 1 for any partial index.
void * pUser
User data pointer.
Definition: dataset.h:195
Dataset iterator structure.
Definition: dataset.h:375
bool done
If true the value in index is valid, false otherwise.
Definition: dataset.h:378
DAS_API bool dasds_iter_next(dasds_iterator *pIter)
Increment the iterator's index by one position, rolling as needed at data boundaries.
DAS_API void dasds_iter_init(dasds_iterator *pIter, const DasDs *pDs)
Initialize a const dataset iterator.
#define DAS_MAX_ID_BUFSZ
The size of an char buffer large enough to hold valid object IDs.
Definition: util.h:311