news 2026/6/7 23:48:53

资金管理平台概率性现金流预测模型(机器学习)

作者头像

张小明

前端开发工程师

1.2k 24
文章封面图
资金管理平台概率性现金流预测模型(机器学习)
特征工程
class CashFlowFeatureEngineer: """现金流特征工程""" def create_features(self, historical_data: pd.DataFrame, future_dates: pd.DatetimeIndex) -> pd.DataFrame: """ 创建机器学习特征 historical_data: 历史现金流数据 - date: 日期 - cash_flow: 实际现金流 - additional features... future_dates: 需要预测的未来日期 """ features = [] for date in future_dates: # 基础时间特征 base_features = { 'date': date, 'year': date.year, 'month': date.month, 'day': date.day, 'day_of_week': date.dayofweek, 'day_of_year': date.dayofyear, 'week_of_year': date.isocalendar().week, 'quarter': date.quarter, 'is_month_start': date.is_month_start, 'is_month_end': date.is_month_end, 'is_quarter_start': date.is_quarter_start, 'is_quarter_end': date.is_quarter_end, 'is_year_start': date.is_year_start, 'is_year_end': date.is_year_end, } # 业务特征(需要业务数据) business_features = self._extract_business_features(date) # 历史统计特征 historical_features = self._extract_historical_features(date, historical_data) # 合并所有特征 all_features = {**base_features, **business_features, **historical_features} features.append(all_features) return pd.DataFrame(features).set_index('date') def _extract_business_features(self, date: datetime) -> Dict: """提取业务特征""" # 从CRM、PM系统获取数据 return { 'expected_sales': self._get_expected_sales(date), 'project_count': self._get_active_project_count(date), 'employee_count': self._get_employee_count(date), 'seasonality_factor': self._get_seasonality_factor(date.month), 'working_days_in_month': self._get_working_days(date.year, date.month) } def _extract_historical_features(self, date: datetime, historical_data: pd.DataFrame) -> Dict: """从历史数据提取特征""" # 计算各种窗口统计量 end_date = date - timedelta(days=1) features = {} # 时间窗口 windows = [7, 14, 30, 60, 90, 180] for window in windows: start_date = end_date - timedelta(days=window) # 筛选窗口期内数据 mask = (historical_data.index >= start_date) & (historical_data.index <= end_date) window_data = historical_data.loc[mask, 'cash_flow'] if len(window_data) > 0: features[f'mean_{window}d'] = window_data.mean() features[f'std_{window}d'] = window_data.std() features[f'min_{window}d'] = window_data.min() features[f'max_{window}d'] = window_data.max() features[f'median_{window}d'] = window_data.median() features[f'skew_{window}d'] = window_data.skew() features[f'kurt_{window}d'] = window_data.kurtosis() # 自相关性特征 if len(window_data) >= 7: features[f'autocorr_7_{window}d'] = window_data.autocorr(lag=7) features[f'autocorr_30_{window}d'] = window_data.autocorr(lag=30) if len(window_data) >= 30 else np.nan # 同比特征(去年同月) if date.month == 2 and date.day == 29: # 处理闰年 last_year_date = datetime(date.year - 1, 2, 28) else: last_year_date = datetime(date.year - 1, date.month, date.day) if last_year_date in historical_data.index: features['yoy'] = historical_data.loc[last_year_date, 'cash_flow'] # 移动平均比(短期/长期) if 'mean_30d' in features and 'mean_90d' in features: features['ma_ratio_30_90'] = features['mean_30d'] / features['mean_90d'] if features['mean_90d'] != 0 else np.nan return features
XGBoost预测模型
import xgboost as xgb from sklearn.model_selection import TimeSeriesSplit, GridSearchCV from sklearn.metrics import mean_absolute_error, mean_squared_error import warnings warnings.filterwarnings('ignore') class XGBoostCashFlowPredictor: """基于XGBoost的现金流预测器""" def __init__(self): self.model = None self.feature_importance = None self.scaler = StandardScaler() def prepare_data(self, features: pd.DataFrame, target: pd.Series): """准备训练数据""" # 处理缺失值 features_filled = features.fillna(features.median()) # 标准化特征 features_scaled = self.scaler.fit_transform(features_filled) return features_scaled, target.values def train(self, X_train, y_train, cv_folds: int = 5): """训练模型(使用时间序列交叉验证)""" # 时间序列交叉验证 tscv = TimeSeriesSplit(n_splits=cv_folds) # XGBoost参数网格 param_grid = { 'n_estimators': [100, 200, 300], 'max_depth': [3, 5, 7], 'learning_rate': [0.01, 0.05, 0.1], 'subsample': [0.8, 0.9, 1.0], 'colsample_bytree': [0.8, 0.9, 1.0], 'min_child_weight': [1, 3, 5] } # 创建基础模型 xgb_model = xgb.XGBRegressor( objective='reg:squarederror', random_state=42, n_jobs=-1 ) # 网格搜索 grid_search = GridSearchCV( estimator=xgb_model, param_grid=param_grid, cv=tscv, scoring='neg_mean_absolute_error', verbose=1, n_jobs=-1 ) # 训练 grid_search.fit(X_train, y_train) # 最佳模型 self.model = grid_search.best_estimator_ self.feature_importance = self.model.feature_importances_ print(f"Best parameters: {grid_search.best_params_}") print(f"Best CV score: {-grid_search.best_score_:.2f}") return grid_search.best_params_ def predict(self, X_test, return_std: bool = False): """预测现金流""" if self.model is None: raise ValueError("Model not trained yet!") X_test_scaled = self.scaler.transform(X_test) if return_std: # 使用quantile regression获取预测区间 # 训练多个分位数回归模型 quantiles = [0.05, 0.5, 0.95] predictions = [] for q in quantiles: model = xgb.XGBRegressor( objective='reg:quantileerror', quantile_alpha=q, **self.model.get_params() ) model.fit(self.model.get_booster().feature_names, self.model.get_booster().feature_types) pred = model.predict(X_test_scaled) predictions.append(pred) median_pred = predictions[1] # 0.5分位数 lower_bound = predictions[0] # 0.05分位数 upper_bound = predictions[2] # 0.95分位数 return median_pred, lower_bound, upper_bound else: return self.model.predict(X_test_scaled) def evaluate(self, y_true, y_pred): """评估模型性能""" metrics = { 'MAE': mean_absolute_error(y_true, y_pred), 'RMSE': np.sqrt(mean_squared_error(y_true, y_pred)), 'MAPE': np.mean(np.abs((y_true - y_pred) / y_true)) * 100, 'R2': self.model.score if hasattr(self.model, 'score') else None } return metrics
Prophet模型集成

python

from prophet import Prophet class ProphetCashFlowPredictor: """基于Prophet的现金流预测器(处理季节性和节假日)""" def __init__(self): self.model = None self.holidays = self._prepare_holidays() def _prepare_holidays(self): """准备节假日数据""" # 中国法定节假日 holidays = pd.DataFrame({ 'holiday': 'chinese_holiday', 'ds': pd.to_datetime([ '2024-01-01', # 元旦 '2024-02-10', '2024-02-11', '2024-02-12', # 春节 '2024-04-04', '2024-04-05', '2024-04-06', # 清明 '2024-05-01', '2024-05-02', '2024-05-03', # 劳动节 '2024-06-10', # 端午 '2024-09-17', # 中秋 '2024-10-01', '2024-10-02', '2024-10-03', # 国庆 ]), 'lower_window': 0, 'upper_window': 1, # 节假日前后一天也受影响 }) # 添加月末/季末效应(财务结算日) month_ends = pd.date_range(start='2023-01-01', end='2025-12-31', freq='M') quarter_ends = pd.date_range(start='2023-01-01', end='2025-12-31', freq='Q') financial_effects = pd.DataFrame({ 'holiday': 'financial_period_end', 'ds': pd.concat([month_ends, quarter_ends]).unique(), 'lower_window': -2, # 结算日前两天开始影响 'upper_window': 0, }) return pd.concat([holidays, financial_effects]) def prepare_data(self, historical_series: pd.Series): """准备Prophet格式数据""" df = historical_series.reset_index() df.columns = ['ds', 'y'] # 添加额外回归量 df['year'] = df['ds'].dt.year df['month'] = df['ds'].dt.month df['day_of_week'] = df['ds'].dt.dayofweek df['is_month_end'] = df['ds'].dt.is_month_end.astype(int) df['is_quarter_end'] = df['ds'].dt.is_quarter_end.astype(int) return df def train(self, df: pd.DataFrame): """训练Prophet模型""" self.model = Prophet( holidays=self.holidays, yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=False, seasonality_mode='multiplicative', changepoint_prior_scale=0.05, holidays_prior_scale=10, seasonality_prior_scale=10, mcmc_samples=0 ) # 添加额外回归量 self.model.add_regressor('is_month_end') self.model.add_regressor('is_quarter_end') # 训练 self.model.fit(df) return self.model def predict(self, future_periods: int, freq: str = 'D'): """生成预测""" if self.model is None: raise ValueError("Model not trained yet!") # 创建未来数据框 future = self.model.make_future_dataframe( periods=future_periods, freq=freq, include_history=False ) # 添加回归量 future['is_month_end'] = future['ds'].dt.is_month_end.astype(int) future['is_quarter_end'] = future['ds'].dt.is_quarter_end.astype(int) # 预测 forecast = self.model.predict(future) return forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
集成预测框架

python

class EnsembleCashFlowForecaster: """集成预测框架""" def __init__(self): self.models = { 'xgb': XGBoostCashFlowPredictor(), 'prophet': ProphetCashFlowPredictor(), 'ensemble': None } self.weights = None def train_ensemble(self, historical_data: pd.DataFrame): """训练集成模型""" # 1. 准备数据 feature_engineer = CashFlowFeatureEngineer() # 使用最后90天作为验证集 train_end = historical_data.index[-90] train_data = historical_data[historical_data.index < train_end] val_data = historical_data[historical_data.index >= train_end] # 2. 训练各个基模型 predictions = {} # XGBoost print("Training XGBoost model...") X_train = feature_engineer.create_features(train_data, train_data.index) y_train = train_data['cash_flow'] self.models['xgb'].prepare_data(X_train, y_train) self.models['xgb'].train(X_train.values, y_train.values) # 在验证集上预测 X_val = feature_engineer.create_features(historical_data, val_data.index) y_val = val_data['cash_flow'] pred_xgb = self.models['xgb'].predict(X_val.values) predictions['xgb'] = pred_xgb # Prophet print("Training Prophet model...") prophet_df = self.models['prophet'].prepare_data(train_data['cash_flow']) self.models['prophet'].train(prophet_df) forecast = self.models['prophet'].predict(len(val_data), 'D') predictions['prophet'] = forecast['yhat'].values # 3. 优化集成权重 self.weights = self._optimize_weights(y_val.values, list(predictions.values())) print(f"Optimized weights: {self.weights}") return predictions def _optimize_weights(self, y_true, predictions_list): """优化集成权重""" from scipy.optimize import minimize def objective(weights): """最小化集成预测的MSE""" # 权重归一化 weights = weights / weights.sum() # 计算加权平均预测 weighted_pred = np.zeros_like(predictions_list[0]) for i, pred in enumerate(predictions_list): weighted_pred += weights[i] * pred # 计算MSE mse = np.mean((y_true - weighted_pred) ** 2) return mse # 约束:权重和为1,非负 n_models = len(predictions_list) initial_weights = np.ones(n_models) / n_models bounds = [(0, 1) for _ in range(n_models)] constraints = {'type': 'eq', 'fun': lambda w: np.sum(w) - 1} result = minimize( objective, initial_weights, bounds=bounds, constraints=constraints, method='SLSQP' ) return result.x / result.x.sum() # 确保归一化 def forecast(self, future_dates: pd.DatetimeIndex, historical_data: pd.DataFrame): """生成集成预测""" if self.weights is None: raise ValueError("Ensemble not trained yet!") # 获取各个模型的预测 feature_engineer = CashFlowFeatureEngineer() predictions = [] # XGBoost预测 X_future = feature_engineer.create_features(historical_data, future_dates) pred_xgb, lower_xgb, upper_xgb = self.models['xgb'].predict(X_future.values, return_std=True) predictions.append(pred_xgb) # Prophet预测 prophet_df = self.models['prophet'].prepare_data(historical_data['cash_flow']) self.models['prophet'].train(prophet_df) forecast = self.models['prophet'].predict(len(future_dates), 'D') predictions.append(forecast['yhat'].values) # 计算加权平均 ensemble_pred = np.zeros_like(predictions[0]) for i, pred in enumerate(predictions): ensemble_pred += self.weights[i] * pred # 计算预测区间(使用最宽区间) lower_bound = np.minimum(lower_xgb, forecast['yhat_lower'].values) upper_bound = np.maximum(upper_xgb, forecast['yhat_upper'].values) result = pd.DataFrame({ 'date': future_dates, 'predicted_cash_flow': ensemble_pred, 'lower_bound': lower_bound, 'upper_bound': upper_bound, 'confidence_interval': upper_bound - lower_bound }).set_index('date') return result
版权声明: 本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若内容造成侵权/违法违规/事实不符,请联系邮箱:809451989@qq.com进行投诉反馈,一经查实,立即删除!
网站建设 2026/6/7 8:58:42

Kotaemon日志系统优化:问题排查从未如此简单

Kotaemon日志系统优化&#xff1a;问题排查从未如此简单 在构建智能对话系统时&#xff0c;你是否经历过这样的场景&#xff1f;用户反馈答案质量下降&#xff0c;但翻遍日志却找不到线索&#xff1b;线上请求突然变慢&#xff0c;却无法判断是检索、生成还是工具调用出了问题&…

作者头像 李华
网站建设 2026/6/7 11:43:06

4.5 约束优化与拉格朗日乘子法:支持向量机的数学基础

4.5 约束优化与拉格朗日乘子法:支持向量机的数学基础 在许多人工智能与机器学习问题中,我们寻找的最优解不仅需要优化某个目标函数,还必须满足一系列附加条件或限制,这类问题被称为约束优化问题。支持向量机作为经典的监督学习模型,其核心数学形式便是一个带不等式约束的…

作者头像 李华
网站建设 2026/6/7 9:41:37

5.4 信息论核心概念:熵、互信息与KL散度

5.4 信息论核心概念:熵、互信息与KL散度 信息论为定量分析信息的产生、传输、存储和处理提供了严格的数学框架。在人工智能领域,信息论的概念和方法不仅为理解通信和编码问题奠定基础,更重要的是,它们提供了衡量不确定性、信息内容和概率分布之间差异的基本工具,从而深刻…

作者头像 李华
网站建设 2026/6/7 17:39:04

第6.3节 数值计算稳定性:浮点误差、病态条件与数值微分

第6.3节 数值计算稳定性:浮点误差、病态条件与数值微分 在人工智能算法的实现过程中,无论是训练深度神经网络还是求解大规模线性系统,最终都依赖于计算机的有限精度算术。这种有限性使得计算结果与理论真值之间存在不可避免的差异,这种差异统称为数值误差。数值计算稳定性…

作者头像 李华
网站建设 2026/6/6 4:17:47

如何用Kotaemon提升大模型回答的准确率和可信度?

如何用Kotaemon提升大模型回答的准确率和可信度&#xff1f; 在企业纷纷拥抱生成式AI的今天&#xff0c;一个尖锐的问题始终悬而未决&#xff1a;我们真的能信任大模型给出的答案吗&#xff1f;尤其是在金融、医疗、法律这类容错率极低的领域&#xff0c;一句看似合理却毫无依据…

作者头像 李华
网站建设 2026/6/8 9:02:04

Kotaemon客户投诉处理话术生成

Kotaemon客户投诉处理话术生成 在金融、电商和电信等行业&#xff0c;客服系统每天要面对成千上万的用户咨询与投诉。一个常见的场景是&#xff1a;用户愤怒地发来消息&#xff0c;“你们上个月多扣了我50块钱&#xff01;”——这时候&#xff0c;如何快速、准确、得体地回应&…

作者头像 李华