The following are 30code examples of sklearn.datasets.get_data_home().You can vote up the ones you like or vote down the ones you don't like,and go to the original project or source file by following the links above each example.You may also want to check out all available functions/classes of the modulenumpy, or try the search function.
Example #1
Source File: flight.pyFrom trafficwith MIT License | 6votes | ![]() ![]() |
def _split( data: pd.DataFrame, value: Union[str, int], unit: Optional[str]) -> Iterator[pd.DataFrame]: # This method helps splitting a flight into several. if data.shape[0] < 2: return diff = data.timestamp.diff().values if unit is None: delta = pd.Timedelta(value).to_timedelta64() else: delta = np.timedelta64(value, unit) # There seems to be a change with numpy >= 1.18 # max() now may return NaN, therefore the following fix max_ = np.nanmax(diff) if max_ > delta: # np.nanargmax seems bugged with timestamps argmax = np.where(diff == max_)[0][0] yield from _split(data.iloc[:argmax], value, unit) yield from _split(data.iloc[argmax:], value, unit) # noqa else: yield data# flake B008
Example #2
Source File: cis.pyFrom tensorqtlwith BSD 3-Clause "New" or "Revised" License | 6votes | ![]() ![]() |
def calculate_cis_permutations(genotypes_t, phenotype_t, permutation_ix_t, residualizer=None): """Calculate nominal and empirical correlations""" permutations_t = phenotype_t[permutation_ix_t] r_nominal_t, genotype_var_t, phenotype_var_t = calculate_corr(genotypes_t, phenotype_t.reshape(1,-1), residualizer=residualizer, return_var=True) std_ratio_t = torch.sqrt(phenotype_var_t.reshape(1,-1) / genotype_var_t.reshape(-1,1)) r_nominal_t = r_nominal_t.squeeze(dim=-1) std_ratio_t = std_ratio_t.squeeze(dim=-1) corr_t = calculate_corr(genotypes_t, permutations_t, residualizer=residualizer).pow(2) # genotypes x permutations corr_t = corr_t[~torch.isnan(corr_t).any(1),:] if corr_t.shape[0] == 0: raise ValueError('All correlations resulted in NaN. Please check phenotype values.') r2_perm_t,_ = corr_t.max(0) # maximum correlation across permutations r2_nominal_t = r_nominal_t.pow(2) r2_nominal_t[torch.isnan(r2_nominal_t)] = -1 # workaround for nanargmax() ix = r2_nominal_t.argmax() return r_nominal_t[ix], std_ratio_t[ix], ix, r2_perm_t, genotypes_t[ix]
Example #3
Source File: persistence.pyFrom caml-mimicwith MIT License | 6votes | ![]() ![]() |
def save_everything(args, metrics_hist_all, model, model_dir, params, criterion, evaluate=False): """ Save metrics, model, params all in model_dir """ save_metrics(metrics_hist_all, model_dir) params['model_dir'] = model_dir save_params_dict(params) if not evaluate: #save the model with the best criterion metric if not np.all(np.isnan(metrics_hist_all[0][criterion])): if criterion == 'loss_dev': eval_val = np.nanargmin(metrics_hist_all[0][criterion]) else: eval_val = np.nanargmax(metrics_hist_all[0][criterion]) if eval_val == len(metrics_hist_all[0][criterion]) - 1: #save state dict sd = model.cpu().state_dict() torch.save(sd, model_dir + "/model_best_%s.pth" % criterion) if args.gpu: model.cuda() print("saved metrics, params, model to directory %s\n" % (model_dir))
Example #4
Source File: center_initializer.pyFrom pyclusteringwith GNU General Public License v3.0 | 6votes | ![]() ![]() |
def __get_next_center(self, centers): """! @brief Calculates the next center for the data. @param[in] centers (array_like): Current initialized centers represented by indexes. @return (array_like) Next initialized center.<br> (uint) Index of next initialized center if return_index is True. """ distances = self.__calculate_shortest_distances(self.__data, centers) if self.__candidates == kmeans_plusplus_initializer.FARTHEST_CENTER_CANDIDATE: for index_point in centers: distances[index_point] = numpy.nan center_index = numpy.nanargmax(distances) else: probabilities = self.__calculate_probabilities(distances) center_index = self.__get_probable_center(distances, probabilities) return center_index
Example #5
Source File: Callbacks.pyFrom GroundedTranslationwith BSD 3-Clause "New" or "Revised" License | 5votes | ![]() ![]() |
def early_stop_decision(self, epoch, val_metric, val_loss): '''Stop training if validation loss has stopped decreasing andvalidation BLEU score has not increased for --patience epochs. WARNING: quits with sys.exit(0).TODO: this doesn't yet support early stopping based on TER '''if val_loss < self.best_val_loss: self.wait = 0 elif val_metric > self.best_val_metric or self.args.no_early_stopping: self.wait = 0 else: self.wait += 1 if self.wait >= self.patience: # we have exceeded patience if val_loss > self.best_val_loss: # and loss is no longer decreasing logger.info("Epoch %d: early stopping", epoch) handle = open("checkpoints/%s/summary" % self.args.run_string, "a") handle.write("Early stopping because patience exceeded\n") best_bleu = np.nanargmax(self.val_metric) best_loss = np.nanargmin(self.val_loss) logger.info("Best Metric: %d | val loss %.5f score %.2f", best_bleu+1, self.val_loss[best_bleu], self.val_metric[best_bleu]) logger.info("Best loss: %d | val loss %.5f score %.2f", best_loss+1, self.val_loss[best_loss], self.val_metric[best_loss]) handle.close() sys.exit(0)
Example #6
Source File: Callbacks.pyFrom GroundedTranslationwith BSD 3-Clause "New" or "Revised" License | 5votes | ![]() ![]() |
def log_performance(self): ''' Record model performance so far, based on validation loss. ''' handle = open("checkpoints/%s/summary" % self.args.run_string, "w") for epoch in range(len(self.val_loss)): handle.write("Checkpoint %d | val loss: %.5f bleu %.2f\n" % (epoch+1, self.val_loss[epoch], self.val_metric[epoch])) logger.info("---") # break up the presentation for clarity # BLEU is the quickest indicator of performance for our task # but loss is our objective function best_bleu = np.nanargmax(self.val_metric) best_loss = np.nanargmin(self.val_loss) logger.info("Best Metric: %d | val loss %.5f score %.2f", best_bleu+1, self.val_loss[best_bleu], self.val_metric[best_bleu]) handle.write("Best Metric: %d | val loss %.5f score %.2f\n" % (best_bleu+1, self.val_loss[best_bleu], self.val_metric[best_bleu])) logger.info("Best loss: %d | val loss %.5f score %.2f", best_loss+1, self.val_loss[best_loss], self.val_metric[best_loss]) handle.write("Best loss: %d | val loss %.5f score %.2f\n" % (best_loss+1, self.val_loss[best_loss], self.val_metric[best_loss])) logger.info("Early stopping marker: wait/patience: %d/%d\n", self.wait, self.patience) handle.write("Early stopping marker: wait/patience: %d/%d\n" % (self.wait, self.patience)) handle.close()
Example #7
Source File: test_interaction.pyFrom recruitwith Apache License 2.0 | 5votes | ![]() ![]() |
def test_nanfunctions_matrices_general(): # Check that it works and that type and # shape are preserved # 2018-04-29: moved here from core.tests.test_nanfunctions mat = np.matrix(np.eye(3)) for f in (np.nanargmin, np.nanargmax, np.nansum, np.nanprod, np.nanmean, np.nanvar, np.nanstd): res = f(mat, axis=0) assert_(isinstance(res, np.matrix)) assert_(res.shape == (1, 3)) res = f(mat, axis=1) assert_(isinstance(res, np.matrix)) assert_(res.shape == (3, 1)) res = f(mat) assert_(np.isscalar(res)) for f in np.nancumsum, np.nancumprod: res = f(mat, axis=0) assert_(isinstance(res, np.matrix)) assert_(res.shape == (3, 3)) res = f(mat, axis=1) assert_(isinstance(res, np.matrix)) assert_(res.shape == (3, 3)) res = f(mat) assert_(isinstance(res, np.matrix)) assert_(res.shape == (1, 3*3))
Example #8
Source File: test_nanfunctions.pyFrom recruitwith Apache License 2.0 | 5votes | ![]() ![]() |
def test_nanargmax(self): tgt = np.argmax(self.mat) for mat in self.integer_arrays(): assert_equal(np.nanargmax(mat), tgt)
Example #9
Source File: test_nanfunctions.pyFrom lambda-packswith MIT License | 5votes | ![]() ![]() |
def test_nanargmax(self): tgt = np.argmax(self.mat) for mat in self.integer_arrays(): assert_equal(np.nanargmax(mat), tgt)
Example #10
Source File: test_nanfunctions.pyFrom auto-alt-text-lambda-apiwith MIT License | 5votes | ![]() ![]() |
def test_nanargmax(self): tgt = np.argmax(self.mat) for mat in self.integer_arrays(): assert_equal(np.nanargmax(mat), tgt)
Example #11
Source File: test_nanfunctions.pyFrom vnpy_cryptowith MIT License | 5votes | ![]() ![]() |
def test_nanargmax(self): tgt = np.argmax(self.mat) for mat in self.integer_arrays(): assert_equal(np.nanargmax(mat), tgt)
Example #12
Source File: test_nanfunctions.pyFrom Computablewith MIT License | 5votes | ![]() ![]() |
def test_nanargmax(self): tgt = np.argmax(self.mat) for mat in self.integer_arrays(): assert_equal(np.nanargmax(mat), tgt)
Example #13
Source File: test_interaction.pyFrom Mastering-Elasticsearch-7.0with MIT License | 5votes | ![]() ![]() |
def test_nanfunctions_matrices_general(): # Check that it works and that type and # shape are preserved # 2018-04-29: moved here from core.tests.test_nanfunctions mat = np.matrix(np.eye(3)) for f in (np.nanargmin, np.nanargmax, np.nansum, np.nanprod, np.nanmean, np.nanvar, np.nanstd): res = f(mat, axis=0) assert_(isinstance(res, np.matrix)) assert_(res.shape == (1, 3)) res = f(mat, axis=1) assert_(isinstance(res, np.matrix)) assert_(res.shape == (3, 1)) res = f(mat) assert_(np.isscalar(res)) for f in np.nancumsum, np.nancumprod: res = f(mat, axis=0) assert_(isinstance(res, np.matrix)) assert_(res.shape == (3, 3)) res = f(mat, axis=1) assert_(isinstance(res, np.matrix)) assert_(res.shape == (3, 3)) res = f(mat) assert_(isinstance(res, np.matrix)) assert_(res.shape == (1, 3*3))
Example #14
Source File: test_nanfunctions.pyFrom Mastering-Elasticsearch-7.0with MIT License | 5votes | ![]() ![]() |
def test_nanargmax(self): tgt = np.argmax(self.mat) for mat in self.integer_arrays(): assert_equal(np.nanargmax(mat), tgt)
Example #15
Source File: test_interaction.pyFrom GraphicDesignPatternByPythonwith MIT License | 5votes | ![]() ![]() |
def test_nanfunctions_matrices_general(): # Check that it works and that type and # shape are preserved # 2018-04-29: moved here from core.tests.test_nanfunctions mat = np.matrix(np.eye(3)) for f in (np.nanargmin, np.nanargmax, np.nansum, np.nanprod, np.nanmean, np.nanvar, np.nanstd): res = f(mat, axis=0) assert_(isinstance(res, np.matrix)) assert_(res.shape == (1, 3)) res = f(mat, axis=1) assert_(isinstance(res, np.matrix)) assert_(res.shape == (3, 1)) res = f(mat) assert_(np.isscalar(res)) for f in np.nancumsum, np.nancumprod: res = f(mat, axis=0) assert_(isinstance(res, np.matrix)) assert_(res.shape == (3, 3)) res = f(mat, axis=1) assert_(isinstance(res, np.matrix)) assert_(res.shape == (3, 3)) res = f(mat) assert_(isinstance(res, np.matrix)) assert_(res.shape == (1, 3*3))
Example #16
Source File: test_nanfunctions.pyFrom GraphicDesignPatternByPythonwith MIT License | 5votes | ![]() ![]() |
def test_nanargmax(self): tgt = np.argmax(self.mat) for mat in self.integer_arrays(): assert_equal(np.nanargmax(mat), tgt)
Example #17
Source File: test_interaction.pyFrom predictive-maintenance-using-machine-learningwith Apache License 2.0 | 5votes | ![]() ![]() |
def test_nanfunctions_matrices_general(): # Check that it works and that type and # shape are preserved # 2018-04-29: moved here from core.tests.test_nanfunctions mat = np.matrix(np.eye(3)) for f in (np.nanargmin, np.nanargmax, np.nansum, np.nanprod, np.nanmean, np.nanvar, np.nanstd): res = f(mat, axis=0) assert_(isinstance(res, np.matrix)) assert_(res.shape == (1, 3)) res = f(mat, axis=1) assert_(isinstance(res, np.matrix)) assert_(res.shape == (3, 1)) res = f(mat) assert_(np.isscalar(res)) for f in np.nancumsum, np.nancumprod: res = f(mat, axis=0) assert_(isinstance(res, np.matrix)) assert_(res.shape == (3, 3)) res = f(mat, axis=1) assert_(isinstance(res, np.matrix)) assert_(res.shape == (3, 3)) res = f(mat) assert_(isinstance(res, np.matrix)) assert_(res.shape == (1, 3*3))
Example #18
Source File: test_nanfunctions.pyFrom predictive-maintenance-using-machine-learningwith Apache License 2.0 | 5votes | ![]() ![]() |
def test_nanargmax(self): tgt = np.argmax(self.mat) for mat in self.integer_arrays(): assert_equal(np.nanargmax(mat), tgt)
Example #19
Source File: test_interaction.pyFrom pySINDywith MIT License | 5votes | ![]() ![]() |
def test_nanfunctions_matrices_general(): # Check that it works and that type and # shape are preserved # 2018-04-29: moved here from core.tests.test_nanfunctions mat = np.matrix(np.eye(3)) for f in (np.nanargmin, np.nanargmax, np.nansum, np.nanprod, np.nanmean, np.nanvar, np.nanstd): res = f(mat, axis=0) assert_(isinstance(res, np.matrix)) assert_(res.shape == (1, 3)) res = f(mat, axis=1) assert_(isinstance(res, np.matrix)) assert_(res.shape == (3, 1)) res = f(mat) assert_(np.isscalar(res)) for f in np.nancumsum, np.nancumprod: res = f(mat, axis=0) assert_(isinstance(res, np.matrix)) assert_(res.shape == (3, 3)) res = f(mat, axis=1) assert_(isinstance(res, np.matrix)) assert_(res.shape == (3, 3)) res = f(mat) assert_(isinstance(res, np.matrix)) assert_(res.shape == (1, 3*3))
Example #20
Source File: test_nanfunctions.pyFrom pySINDywith MIT License | 5votes | ![]() ![]() |
def test_nanargmax(self): tgt = np.argmax(self.mat) for mat in self.integer_arrays(): assert_equal(np.nanargmax(mat), tgt)
Example #21
Source File: test_nanfunctions.pyFrom mxnet-lambdawith Apache License 2.0 | 5votes | ![]() ![]() |
def test_nanargmax(self): tgt = np.argmax(self.mat) for mat in self.integer_arrays(): assert_equal(np.nanargmax(mat), tgt)
Example #22
Source File: periodogram.pyFrom lightkurvewith MIT License | 5votes | ![]() ![]() |
def frequency_at_max_power(self): """Returns the frequency corresponding to the highest peak in the periodogram.""" return self.frequency[np.nanargmax(self.power)]
Example #23
Source File: periodogram.pyFrom lightkurvewith MIT License | 5votes | ![]() ![]() |
def transit_time_at_max_power(self): """Returns the transit time corresponding to the highest peak in the periodogram.""" return self.transit_time[np.nanargmax(self.power)]
Example #24
Source File: periodogram.pyFrom lightkurvewith MIT License | 5votes | ![]() ![]() |
def duration_at_max_power(self): """Returns the duration corresponding to the highest peak in the periodogram.""" return self.duration[np.nanargmax(self.power)]
Example #25
Source File: periodogram.pyFrom lightkurvewith MIT License | 5votes | ![]() ![]() |
def depth_at_max_power(self): """Returns the depth corresponding to the highest peak in the periodogram.""" return self.depth[np.nanargmax(self.power)]
Example #26
Source File: test_nanfunctions.pyFrom ImageFusionwith MIT License | 5votes | ![]() ![]() |
def test_nanargmax(self): tgt = np.argmax(self.mat) for mat in self.integer_arrays(): assert_equal(np.nanargmax(mat), tgt)
Example #27
Source File: test_sdc_numpy.pyFrom sdcwith BSD 2-Clause "Simplified" License | 5votes | ![]() ![]() |
def test_nanargmax(self): def ref_impl(a): return np.nanargmax(a) def sdc_impl(a): return numpy_like.nanargmax(a) sdc_func = self.jit(sdc_impl) cases = [[np.nan, np.nan, np.inf, np.nan], [5, 2, -9, 333, -4], [3.3, 5.4, np.nan, 7.9]] for case in cases: a = np.array(case) with self.subTest(data=case): np.testing.assert_array_equal(sdc_func(a), ref_impl(a))
Example #28
Source File: cem_planner.pyFrom ReAgentwith BSD 3-Clause "New" or "Revised" License | 5votes | ![]() ![]() |
def discrete_planning(self, state: rlt.FeatureData) -> Tuple[int, np.ndarray]: # For discrete actions, we use random shoots to get the best next action random_action_seqs = list( itertools.product(range(self.action_dim), repeat=self.plan_horizon_length) ) random_action_seqs = random.choices(random_action_seqs, k=self.cem_pop_size) action_solutions = torch.zeros( self.cem_pop_size, self.plan_horizon_length, self.action_dim ) for i, action_seq in enumerate(random_action_seqs): for j, act_idx in enumerate(action_seq): action_solutions[i, j, act_idx] = 1 acc_rewards = self.acc_rewards_of_all_solutions(state, action_solutions) first_action_tally = np.zeros(self.action_dim) reward_tally = np.zeros(self.action_dim) # pyre-fixme[6]: Expected `Iterable[Variable[_T2]]` for 2nd param but got # `float`. for action_seq, acc_reward in zip(random_action_seqs, acc_rewards): first_action = action_seq[0] first_action_tally[first_action] += 1 reward_tally[first_action] += acc_reward best_next_action_idx = np.nanargmax(reward_tally / first_action_tally) best_next_action_one_hot = torch.zeros(self.action_dim).float() best_next_action_one_hot[best_next_action_idx] = 1 logger.debug( f"Choose action {best_next_action_idx}." f"Stats: {reward_tally} / {first_action_tally}" f" = {reward_tally/first_action_tally} " ) return best_next_action_idx, best_next_action_one_hot
Example #29
Source File: training.pyFrom caml-mimicwith MIT License | 5votes | ![]() ![]() |
def early_stop(metrics_hist, criterion, patience): if not np.all(np.isnan(metrics_hist[criterion])): if len(metrics_hist[criterion]) >= patience: if criterion == 'loss_dev': return np.nanargmin(metrics_hist[criterion]) < len(metrics_hist[criterion]) - patience else: return np.nanargmax(metrics_hist[criterion]) < len(metrics_hist[criterion]) - patience else: #keep training if criterion results have all been nan so far return False
Example #30
Source File: ch_ops.pyFrom chumpywith MIT License | 5votes | ![]() ![]() |
def argf(self, *args, **kwargs): return np.nanargmax(*args, **kwargs)
FAQs
What is the maximum dataset size for sklearn? ›
You can load 22 GB of data into Dask or SFrame, then use with sklearn.
What datasets does sklearn have? ›...
7.1. Toy datasets
- Iris plants dataset. ...
- Diabetes dataset. ...
- Optical recognition of handwritten digits dataset. ...
- Linnerrud dataset. ...
- Wine recognition dataset.
The sklearn. datasets package is able to download datasets from the repository using the function sklearn. datasets. fetch_openml .
How big should my dataset be? ›The Size of a Data Set
As a rough rule of thumb, your model should train on at least an order of magnitude more examples than trainable parameters. Simple models on large data sets generally beat fancy models on small data sets.
- An approximated calculation for the size of a dataset is: number Of Megabytes = M = (N*V*W) / 1024^2. ...
- The size of your dataset is: M = 20000*20*2.9/1024^2 = 1.13 megabytes. ...
- Yes, the result is divided by 1,0242 even though 1,0002 = a million. Computer memory comes in binary increments.
- Step 1: Choose a class of model. In this first step, we need to choose a class of model. ...
- Step 2: Choose model hyperparameters. In this step, we need to choose class model hyperparameters. ...
- Step 3: Arranging the data. ...
- Step 4: Model Fitting. ...
- Step 5: Applying the model.
Essentially, sklearn is a dummy project on PyPi that will in turn install scikit-learn . Therefore, if you uninstall sklearn you are just uninstalling the dummy package, and not the actual package itself.
How do I see available datasets in sklearn? ›Datasets in sklearn
Packaged Data: these small datasets are packaged with the scikit-learn installation, and can be downloaded using the tools in sklearn. datasets. load_* Downloadable Data: these larger datasets are available for download, and scikit-learn includes tools which streamline this process.
Dask is a Python library for parallel computing, which is able to perform computations on large datasets while scaling well-known Python libraries such as pandas , NumPy , and scikit-learn . Dask splits the dataset into a number of partitions. Unlike pandas , each Dask partition is sent to a separate CPU core.
What is the size of sklearn? ›In Scikit Learn, the 'train_test_split” function will have the parameter 'test_size'. This represents what proportion of the dataset to be included in the test dataset. It should be between 0 and 1. If None, the test_size will be set to the complement of the train size.
What is the maximum size of data in frame? ›
Frame Part | Maximum Size Frame |
---|---|
MAC Type (or Length) | 2 Bytes |
Payload (Network PDU) | 1500 Bytes |
Check Sequence (CRC) | 4 Bytes |
Total Frame Physical Size | 1538 Bytes |
Python provides a huge number of libraries to work on Big Data. You can also work – in terms of developing code – using Python for Big Data much faster than any other programming language. These two aspects are enabling developers worldwide to embrace Python as the language of choice for Big Data projects.