Step 3: Create data generator functionality
The following are several functions used to generate data.marketcap_
, is used by the seed_
stored procedure to convert a string value for the market capitalization of a stock into a decimal value.
DELIMITER //CREATE OR REPLACE FUNCTION marketcap_to_DECIMAL(s VARCHAR(15))RETURNS DECIMAL(18,2) ASDECLAREm CHAR(1) = SUBSTR(s, LENGTH(s), 1); -- M or Braw_v DECIMAL(18,2) = SUBSTR(s, 2, LENGTH(s) - 1);v DECIMAL(18,2) = NULL;BEGINIF m = "B" THENv = raw_v * 1000;ELSEv = raw_v;END IF;RETURN v;END //DELIMITER ;
The seed_
stored procedure generates rows of seed
trade data based on an integer input value.
The stored procedure generates approximately the same number of trades for each company.
DELIMITER //CREATE OR REPLACE PROCEDURE seed_trades(num_trades INT) RETURNS INT ASDECLAREranked_companies ARRAY(RECORD(symbol CHAR(5), _rank INT));DECLAREq QUERY(symbol CHAR(5), _rank INT) =SELECT symbol, rank() OVER (ORDER BY marketcap_to_DECIMAL(market_cap)) AS _rankFROM companyWHERE LENGTH(symbol) < 5ORDER BY _rank DESC LIMIT 200;i INT = 0;rank_num INT;next_id INT = 1;sym CHAR(5);price_base DECIMAL(18,4);current_prices ARRAY(INT);l ARRAY(RECORD(symbol CHAR(5), _rank INT));BEGINl = collect(q);FOR r IN l LOOPi += 1;rank_num = r._rank;sym = r.symbol;price_base = FLOOR(rand() * 50) + 50;FOR j IN 1..((rank_num / 10) + RAND() * 10) LOOPINSERT trade VALUES(next_id,sym,FLOOR(1 + RAND() * 10) * 100, -- sharesprice_base, -- share_priceDATE_ADD(NOW(), INTERVAL RAND() * 6 HOUR)); -- random time during trading day, roughlynext_id += 1;IF next_id > num_trades THEN RETURN(next_id); END IF;END LOOP;END LOOP;RETURN(next_id);END //DELIMITER ;
This last stored procedure, iter_
, generates additional trade events using a random walk
to determine the next price for each company.
Each iteration inserts new records equal to num_
.iter_
completes, the total number of records in the trade
table will be equal to num_
* iterations
+ num_
.
DELIMITER //CREATE OR REPLACE PROCEDURE iter_stocks(iterations INT) asDECLAREtickers ARRAY(CHAR(5));prices ARRAY(DECIMAL(18,4));last_ids ARRAY(bigINT);counts ARRAY(INT);next_id bigINT = 1;ticker CHAR(5);price DECIMAL(18,4);c INT;rand DECIMAL(18,4);tickers_q QUERY(t CHAR(5), p DECIMAL(18,4), lid BIGINT, c INT) = SELECT stock_symbol, share_price, MIN(id), COUNT(*) FROM trade GROUP BY stock_symbol;q ARRAY(RECORD(t CHAR(5), p DECIMAL(18,4), lid bigINT, c INT));q_count QUERY(c INT) = SELECT COUNT(*) FROM trade;total_c INT;BEGINq = COLLECT(tickers_q);tickers = CREATE_ARRAY(LENGTH(q));prices = CREATE_ARRAY(LENGTH(q));last_ids = CREATE_ARRAY(LENGTH(q));counts = CREATE_ARRAY(LENGTH(q));total_c = SCALAR(q_count);FOR r IN 0..LENGTH(q)-1 LOOPtickers[r] = q[r].t;prices[r] = q[r].p;last_ids[r] = q[r].lid;counts[r] = q[r].c;END LOOP;FOR j IN 0..(iterations-1) LOOPFOR i IN 0..LENGTH(tickers)-1 LOOPticker = tickers[i];price = prices[i];next_id = last_ids[i];c = counts[i];rand = POW(-1, FLOOR(RAND()*2)) * RAND();INSERT INTO tradeSELECT id + total_c, stock_symbol, shares, share_price + rand, trade_time FROM trade WHERE stock_symbol = ticker AND id >= next_id;prices[i] = price + rand;last_ids[i] = next_id + total_c;END LOOP;END LOOP;END //DELIMITER ;
Last modified: March 2, 2021