import pandas as pd

# ===从hdf中读取1分钟数据
df = pd.read_hdf('/Users/jxing/Desktop/coin_quant_class/data/class6/eos_1min_data.h5', key='all_data')
# 选取某一时间段
df = df[df['candle_begin_time'] >= pd.to_datetime('2017-03-01')]
print(df.head(10))

# 《数据周线转换示意图》

# ===第一种方法:将1分钟数据转为5分钟数据
# 将candle_begin_time设定为index
df.set_index('candle_begin_time', inplace=True)

# 周期转换方法:resample
rule_type = '5T'  # rule='5T':意思是5分钟,意味着转变为5分钟数据
period_df = df[['close']].resample(rule=rule_type).last()  # last:取这5分钟的最后一行数据

# 开、高、低的价格,成交量
period_df['open'] = df['open'].resample(rule=rule_type).first()
period_df['high'] = df['high'].resample(rule=rule_type).max()
period_df['low'] = df['low'].resample(rule=rule_type).min()
period_df['volume'] = df['volume'].resample(rule=rule_type).sum()

period_df = period_df[['open', 'high', 'low', 'close', 'volume']]

# ===第二种方法:将1分钟数据转为5分钟数据
rule_type = '5T'
period_df = df.resample(rule=rule_type, on='candle_begin_time', base=0, label='left', closed='left').agg(
    {'open': 'first',
     'high': 'max',
     'low': 'min',
     'close': 'last',
     'volume': 'sum',
     })
period_df = period_df[['open', 'high', 'low', 'close', 'volume']]
# base参数:帮助确定转换周期开始的时间
# label='left', closed='left',建议统一设置成'left'

# ===去除不必要的数据
# 去除一天都没有交易的周
period_df.dropna(subset=['open'], inplace=True)
# 去除成交量为0的交易周期
period_df = period_df[period_df['volume'] > 0]

# ===rule的取值
"""
    B       business day frequency
    C       custom business day frequency (experimental)
    D       calendar day frequency
    W       weekly frequency
    M       month end frequency
    SM      semi-month end frequency (15th and end of month)
    BM      business month end frequency
    CBM     custom business month end frequency
    MS      month start frequency
    SMS     semi-month start frequency (1st and 15th)
    BMS     business month start frequency
    CBMS    custom business month start frequency
    Q       quarter end frequency
    BQ      business quarter endfrequency
    QS      quarter start frequency
    BQS     business quarter start frequency
    A       year end frequency
    BA      business year end frequency
    AS      year start frequency
    BAS     business year start frequency
    BH      business hour frequency
    H       hourly frequency
    T       minutely frequency
    S       secondly frequency
    L       milliseonds
    U       microseconds
    N       nanoseconds
"""