Overview
Presto is an open-source application that allows for querying of data stored in data storage platforms such as HDFS and S3 using SQL. Presto can be used to query Capsule8 investigations data directly out of HDFS.
Requirements
- HDFS Cluster
- Hive Metastore setup
- Presto Installed
- SSH access to the Presto coordinator and worker servers.
- Completed Setting up Investigations with Hadoop
1. Hive Configuration
The first step is configure the Presto cluster to connect to a Hive metadata store. This can be done using the hive connector: https://prestodb.io/docs/current/connector/hive.html. Additionally the catalog config must have hive.recursive-directories=true
set. This allows for data written in hive partitioned folders by the Capsule8 sensor to be scanned by Presto.
2. Create Tables
The second step is to create a database. This can be done in the Presto CLI with:
CREATE SCHEMA hive.presto;
And then set the default database:
USE hive.presto;
With the database created, the next step is to add the tables. This can be done by entering the following DDL statements in Presto CLI:
Note: hadoop-nn
is the default namenode and should be replace with the namenode of the HDFS cluster that contains the files to be queried.
ALERT table
CREATE TABLE hive.presto.alerts (
unix_nano_timestamp bigint,
alert_id varchar,
sensor_id varchar,
priority varchar,
process_c8id varchar,
incident_id varchar,
policy_type varchar,
strategy_name varchar,
audit_group_id varchar,
messages array(varchar)
)
WITH (
external_location = 'hdfs://hadoop-nn:9000/c8/alerts',
format = 'PARQUET'
)
CONNECTIONS table
CREATE TABLE hive.presto.connections (
unix_nano_timestamp bigint,
process_c8id varchar,
process_pid integer,
monotime_nanos bigint,
dst_addr varchar,
dst_port integer,
incident_id varchar,
success boolean,
audit_group_id varchar
)
WITH (
external_location = 'hdfs://hadoop-nn:9000/c8/connections',
format = 'PARQUET'
)
CONTAINER_EVENTS table
CREATE TABLE hive.presto.container_events (
container_id varchar,
unix_nano_timestamp bigint,
incident_id varchar,
sensor_id varchar,
process_c8id varchar,
event_type integer,
container_name varchar,
image_id varchar,
image_name varchar,
pod_name varchar,
namespace varchar,
event_type_name varchar,
audit_group_id varchar
)
WITH (
external_location = 'hdfs://hadoop-nn:9000/c8/container_events',
format = 'PARQUET'
)
FILE_EVENTS table
CREATE TABLE hive.presto.file_events (
unix_nano_timestamp bigint,
sensor_id varchar,
process_c8id varchar,
pid integer,
container_id varchar,
path varchar,
source_path varchar,
event_type integer,
incident_id varchar,
event_type_name varchar,
audit_group_id varchar
)
WITH (
external_location = 'hdfs://hadoop-nn:9000/c8/file_events',
format = 'PARQUET'
)
LOST_RECORDS table
CREATE TABLE hive.presto.lost_records (
unix_nano_timestamp bigint,
event_type integer,
action_type integer,
reason varchar,
lost_count bigint,
event_type_name varchar,
action_type_name varchar,
sensor_id varchar
)
WITH (
external_location = 'hdfs://hadoop-nn:9000/c8/lost_records',
format = 'PARQUET'
)
PROCESS_EVENTS table
CREATE TABLE hive.presto.process_events (
unix_nano_timestamp bigint,
event_type integer,
process_c8id varchar,
sensor_id varchar,
pid integer,
container_id varchar,
parent_process_c8id varchar,
parent_pid integer,
gid integer,
group varchar,
uid integer,
username varchar,
euid integer,
effective_username varchar,
fsuid integer,
fs_username varchar,
fsgid integer,
fs_group varchar,
login_uid integer,
login_username varchar,
login_gid integer,
login_group varchar,
path varchar,
arguments array(varchar),
incident_id varchar,
child_pid integer,
child_process_c8id varchar,
egid integer,
effective_group varchar,
tid integer,
return_value integer,
event_type_name varchar,
audit_group_id varchar
)
WITH (
external_location = 'hdfs://hadoop-nn:9000/c8/process_events',
format = 'PARQUET'
)
SENSORS table
CREATE TABLE hive.presto.sensors (
unix_nano_timestamp bigint,
sensor_id varchar,
sensor_pid integer,
hostname varchar
)
WITH (
external_location = 'hdfs://hadoop-nn:9000/c8/sensors',
format = 'PARQUET'
)
SENSOR_METADATA table
CREATE TABLE hive.presto.sensor_metadata (
sensor_id varchar,
key varchar,
value varchar
)
WITH (
external_location = 'hdfs://hadoop-nn:9000/c8/sensor_metadata',
format = 'PARQUET'
)
SHELL_COMMANDS table
CREATE TABLE hive.presto.shell_commands (
unix_nano_timestamp bigint,
program_filename varchar,
program_arguments array(varchar),
process_c8id varchar,
process_pid integer,
shell_process_c8id varchar,
shell_process_pid integer,
exec_event_id varchar,
monotime_nanos bigint,
container_id varchar,
sensor_id varchar,
uid integer,
username varchar,
gid integer,
group varchar,
euid integer,
effective_username varchar,
egid integer,
effective_group varchar,
suid integer,
saved_username varchar,
sgid integer,
saved_group varchar,
fsuid integer,
file_system_username varchar,
fsgid integer,
file_system_group varchar,
incident_id varchar,
audit_group_id varchar
)
WITH (
external_location = 'hdfs://hadoop-nn:9000/c8/shell_commands',
format = 'PARQUET'
)
TTY_DATA table
CREATE TABLE hive.presto.tty_data (
unix_nano_timestamp bigint,
data varchar,
is_input boolean,
monotime_nanos bigint,
shell_process_c8id varchar,
incident_id varchar,
audit_group_id varchar
)
WITH (
external_location = 'hdfs://hadoop-nn:9000/c8/tty_data',
format = 'PARQUET'
)
Comments
0 comments
Please sign in to leave a comment.