learn.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. # Import necessary libraries
  2. import pandas as pd
  3. from sklearn.model_selection import train_test_split
  4. from sklearn.preprocessing import LabelEncoder
  5. from sklearn.ensemble import RandomForestClassifier
  6. from sklearn.metrics import accuracy_score, classification_report
  7. # Sample data (replace it with your actual dataset)
  8. data = {
  9. 'Amount': [100, 50, 200, 150, 75],
  10. 'Merchant': ['StoreA', 'StoreB', 'StoreA', 'StoreC', 'StoreB'],
  11. 'Date': ['2022-01-01', '2022-01-02', '2022-01-03', '2022-01-04', '2022-01-05'],
  12. 'TransactionType': ['Bensin', 'Matur', 'Bensin', 'Rafmagn', 'Matur']
  13. }
  14. df = pd.DataFrame(data)
  15. # Preprocessing and Feature Engineering
  16. le_merchant = LabelEncoder()
  17. df['Merchant'] = le_merchant.fit_transform(df['Merchant'])
  18. df['Date'] = pd.to_datetime(df['Date'])
  19. df['DayOfWeek'] = df['Date'].dt.dayofweek
  20. df['IsWeekend'] = df['DayOfWeek'].isin([5, 6]).astype(int)
  21. # Drop unnecessary columns
  22. df = df.drop(['Date', 'DayOfWeek'], axis=1)
  23. # Define features and target variable
  24. X = df.drop('TransactionType', axis=1)
  25. y = df['TransactionType']
  26. # Split the data into training and testing sets
  27. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
  28. # Train a RandomForestClassifier (replace with your preferred model)
  29. model = RandomForestClassifier(random_state=42)
  30. model.fit(X_train, y_train)
  31. # Make predictions on the test set
  32. y_pred = model.predict(X_test)
  33. # Evaluate the model
  34. accuracy = accuracy_score(y_test, y_pred)
  35. print(f'Accuracy: {accuracy:.2f}')
  36. # Display classification report
  37. print('Classification Report:')
  38. print(classification_report(y_test, y_pred))