AsterixDB初识
基本概念
后台启动AsterixDB
cd ~/asterixdb/asterixdb/asterix-server/target/asterix-server-0.9.10-SNAPSHOT-binary-assembly/apache-asterixdb-0.9.10-SNAPSHOT/opt/local/bin
./start-sample-cluster.sh
修改AsterixDB代码后,调试AsterixDB;启动/asterixdb/asterixdb/asterix-app/src/test/java/org/apache/asterix/api/common/AsterixHyracksIntegrationUtil.java
AsterixDB操作流程
-
创建Dataverse
CREATE DATAVERSE Traffic IF NOT EXISTS;
-
使用Dataverse【每一条语句之前执行】
USE Traffic;
-
设置存储类型(B-tree, R-tree等)
CREATE TYPE TrafficRecordType AS { Vehicle_ID: int, v_Vel: float, Location: string };
-
创建数据库
// 设置单一主键 CREATE DATASET TrafficRecords(TrafficRecordType) PRIMARY KEY Vehicle_ID; // Choose an appropriate primary key // 设置复合主键 CREATE DATASET traffic_CSV_set(TrafficRecordType) PRIMARY KEY Vehicle_ID, Frame_ID;
-
创建索引(default: B-tree;可指定为R-tree)
// 在TrafficRecords【dataset】上的GeoLocation字段,建立R-tree索引,名为TrafficRecords_Rtree create index TrafficRecords_Rtree on TrafficRecords(GeoLocation) type RTREE;
-
导入数据(三种方式:直接插入,导入csv文件,转化为ADM格式数据导入)
// insert 数据 insert into TestPoint ({ "Vehicle_ID":1, "v_Vel": 2.2, "Location": "this is a String." }); // 导入csv数据,csv中仅能使用int、double等基本类型 LOAD DATASET TrafficRecords USING localfs (("path"="127.0.0.1:///path/data.csv"), ("format"="delimited-text"), ("delimiter"=","), ("quote"="\""), ("header"="true")); // 导入ADM格式数据 LOAD DATASET Exper2 USING localfs (("path"="127.0.0.1:///path/data.adm"), ("format"="adm"));
-
查询,验证数据是否导入
SELECT * FROM TrafficRecords;
基本操作
-
删除数据类型或dataset
- dataType 由 dataset 使用,因此在删除时需要先删除 dataset , 再删除 dataType
- 删除dataset
drop dataset TrafficRecords;
- 删除dataType
drop type TrafficRecordType;
-
upsert语法,更新
upsert into TrafficLSMRTree_1 ({ "Vehicle_ID": 1, "Frame_ID": 271, // 将该值由270更新为271 "Total_Frames": 569 });
-
delete记录
delete from TrafficLSMRTree_1 where Combined_ID_Time = "1,1118847869000";
-
插入空间数据
... // datatype create type pointType AS{ Vehicle_ID: int, GeoLocation: point }; create dataset TestPoint(pointType) primary key Vehicle_ID; insert into TestPoint ({ "Vehicle_ID":1, "GeoLocation":point("5.2,10.2") }); ... ----------------------------------- GeoLocation格式: "GeoLocation":point(5.2,10.2) N "GeoLocation":point("5.2","10.2") N "GeoLocation":point("5.2,10.2") Y ----------------------------------- 不能导入含有point数据的csv文件
-
ADM格式数据(JSON串)
// adm数据示例: [{ "Vehicle_ID": 809, "v_Vel": 53.5, "Combined_ID_Time": "809,1118847220800", "GeoLocation": point("6452727.119,1871874.94") }]
数据读取
-
按照数据写入时的时间戳进行读取数据
USE TrafficUS101; SELECT * FROM TrafficLSMRTree_1 WHERE Global_Time >= 1118847890000 AND Global_Time <= 1118847900000; // success
-
按照空间位置信息,即空间范围GeoLocation进行划分
USE TrafficUS101; SELECT * FROM TrafficLSMRTree_1 WHERE spatial_intersect(GeoLocation, create_rectangle(create_point(6451060.0, 1871870.0), create_point(6452745.0, 1873415.0)));
-
使用R-tree索引进行数据查询
[AsterixDB中使用函数](AsterixDB – Builtin Functions (apache.org))
// 点查询 SELECT * FROM TrafficRecordsWithGeo WHERE spatial_intersect(GeoLocation, create_point(5.2, 10.3)); // 范围查询(支持矩形、圆形等) SELECT * FROM TestGeoPointCSV1 WHERE spatial_intersect(GeoLocation, create_rectangle(create_point(5.0,10.0), create_point(6.0,11.0))); // 距离查询,这条查询会返回距离点 (5.2, 10.3) 最近的1个记录。 SELECT * FROM TestGeoPointCSV1 ORDER BY spatial_distance(GeoLocation, create_point(5.2, 10.3)) LIMIT 1;