Python · 1784 bytes Raw Blame History
1 #!/usr/bin/env python3
2 """
3 Data processing utilities
4 """
5
6 import numpy as np
7 import pandas as pd
8
9 <<<<<<< HEAD
10 def load_data(filename, use_cache=True):
11 """Load data from file with optional caching"""
12 if use_cache:
13 # Try to load from cache
14 cache_file = filename + '.cache'
15 if os.path.exists(cache_file):
16 return pd.read_pickle(cache_file)
17
18 # Load from original file
19 data = pd.read_csv(filename)
20
21 if use_cache:
22 data.to_pickle(cache_file)
23
24 return data
25 =======
26 def load_data(filename):
27 """Load data from CSV file"""
28 return pd.read_csv(filename, parse_dates=True, index_col=0)
29 >>>>>>> feature-simple-loader
30
31 class DataProcessor:
32 def __init__(self, data):
33 self.data = data
34 self.processed = False
35
36 <<<<<<< HEAD
37 def normalize(self, method='minmax'):
38 """Normalize data using specified method"""
39 if method == 'minmax':
40 # Min-max normalization
41 self.data = (self.data - self.data.min()) / (self.data.max() - self.data.min())
42 elif method == 'zscore':
43 # Z-score normalization
44 self.data = (self.data - self.data.mean()) / self.data.std()
45 else:
46 raise ValueError(f"Unknown normalization method: {method}")
47
48 self.processed = True
49 =======
50 def normalize(self):
51 """Normalize data using z-score"""
52 self.data = (self.data - self.data.mean()) / self.data.std()
53 self.processed = True
54 >>>>>>> feature-zscore-only
55
56 def save(self, filename):
57 """Save processed data"""
58 self.data.to_csv(filename, index=False)
59
60 if __name__ == "__main__":
61 data = load_data("input.csv")
62 processor = DataProcessor(data)
63 processor.normalize()
64 processor.save("output.csv")