Table Of Contents

1.2.3. Data in columns

[1]:
import quasardb
import datetime as dt
import numpy as np
import matplotlib.pyplot as plt
3.4.0
[2]:
sample_size=10
print(quasardb.version())
c = quasardb.Cluster("qdb://qdb-server:2836")

Create a column in table with a double type

[14]:
#create table for this sample
table = c.ts("ts_colum1_1")
columns = [quasardb.ColumnInfo(quasardb.ColumnType.Double, "close")]
table.create(columns)

#create timestamps to associate to timeseries
timestamps = np.array('2019-03-14', dtype=np.datetime64)+ np.arange(sample_size)

# Generate random doubles of timeserie
double_values = np.random.uniform(-100.0, 100.0, sample_size)
# Insert the double timeserie
table.double_insert("close", timestamps.astype('datetime64[ns]'), double_values)

retrieving entire column using a query

[15]:
q = c.query("select * from ts_colum1_1 ")
res = q.run()

for col in res.tables["ts_colum1_1"]:
    # col.name is a string for the name of the column
    # col.data is a numpy array of the proper type
    assert sample_size== len(col.data)
    print(col.name, ": ", col.data)
$timestamp :  ['2019-03-14T00:00:00.000000000' '2019-03-15T00:00:00.000000000'
 '2019-03-16T00:00:00.000000000' '2019-03-17T00:00:00.000000000'
 '2019-03-18T00:00:00.000000000' '2019-03-19T00:00:00.000000000'
 '2019-03-20T00:00:00.000000000' '2019-03-21T00:00:00.000000000'
 '2019-03-22T00:00:00.000000000' '2019-03-23T00:00:00.000000000']
close :  [-23.67141823 -96.01213163 -29.53954644 -10.29428824 -63.11963866
  82.21661339  76.4220324  -84.62654452  17.96786018  36.84414188]

retriving entire column using native python

[16]:
points = table.double_get_ranges("close")
query_timestamps = points[0]
query_values = points[1]
assert len(query_timestamps) == len(query_values)
assert len(query_timestamps) == sample_size
print("returning %d items"  % len(query_timestamps))
returning 10 items
[17]:
plt.style.use('seaborn-whitegrid')
plt.plot(query_timestamps, query_values)
plt.xticks(rotation=90)
plt.show()
../../_images/primer_notebooks_ColumnNumpy_9_0.png

Retrieve a range of 2 days

[18]:
points = table.double_get_ranges("close", [(np.datetime64('2019-03-14', 'ns'),
                                        np.datetime64('2019-03-16', 'ns'))])
query_timestamps = points[0]
query_values = points[1]
assert len(query_timestamps) == len(query_values)
assert len(query_timestamps) == 2
print(query_timestamps,query_values)

['2019-03-14T00:00:00.000000000' '2019-03-15T00:00:00.000000000'] [-23.67141823 -96.01213163]

Retrieve multiple time ranges column

[19]:
points = table.double_get_ranges("close", [(np.datetime64('2019-03-14', 'ns'),
                                        np.datetime64('2019-03-16', 'ns')),
                                           (np.datetime64('2019-03-18', 'ns'),
                                        np.datetime64('2019-03-20', 'ns'))
                                       ])

query_timestamps = points[0]
query_values = points[1]
assert len(query_timestamps) == len(query_values)
assert len(query_timestamps) == 4
print(query_timestamps,query_values)
['2019-03-14T00:00:00.000000000' '2019-03-15T00:00:00.000000000'
 '2019-03-18T00:00:00.000000000' '2019-03-19T00:00:00.000000000'] [-23.67141823 -96.01213163 -63.11963866  82.21661339]

Retrieving column content using iterator If the table is very large ( several millions of lines ) this method streams avoiding memory overflows on your client

[21]:
for row in table.reader(dict=True):
    print(row['close'])
-23.671418234722978
-96.01213163300449
-29.539546435258444
-10.294288239351928
-63.11963866140245
82.21661338790912
76.42203239543664
-84.6265445206698
17.967860182876905
36.84414188124572
[22]:
assert table.column_type_by_id('close')== quasardb.quasardb.ColumnType.Double
[23]:
#tearing down data for this sample
table = c.ts("ts_colum1_1")
table.remove()
[ ]:

arrow_backPrevious
1.2.2. Using Pandas DataFrames
Next arrow_forward
2. Tutorials