0
我有一套基本上是問題答案的值,但是當我計算出答案時,我想通過給出答案權重來假裝我有更好的答案分佈。這裏是展示一個簡單的例子代碼:基於變量的權重值
from pprint import pprint
q1 = [
'blue',
'orange',
'red',
]
q2 = [
'male',
'female',
]
q3 = [
'18-25',
'26-30',
'31-40',
'41+'
]
data = [
{'q1': 1, 'q2': 1, 'q3': 0}, # orange, female, 18-25
{'q1': 0, 'q2': 1, 'q3': 0}, # blue, female, 18-25
{'q1': 1, 'q2': 0, 'q3': 0}, # orange, male, 18-25
{'q1': 2, 'q2': 1, 'q3': 1}, # red, female, 26-30
{'q1': 2, 'q2': 1, 'q3': 1}, # red, female, 26-30
{'q1': 1, 'q2': 0, 'q3': 1}, # orange, male, 18-25
]
counts = {
'q1': {},
'q2': {},
'q3': {}
}
respondent_value = 1
for respondent in data:
q1_val = q1[respondent['q1']]
q2_val = q2[respondent['q2']]
q3_val = q3[respondent['q3']]
if q1_val not in counts['q1']:
counts['q1'][q1_val] = 0
counts['q1'][q1_val] += respondent_value
if q2_val not in counts['q2']:
counts['q2'][q2_val] = 0
counts['q2'][q2_val] += 1
if q3_val not in counts['q3']:
counts['q3'][q3_val] = 0
counts['q3'][q3_val] += respondent_value
pprint(counts)
目前這將打印以下值:
{'q1': {'blue': 1, 'orange': 3, 'red': 2},
'q2': {'female': 4, 'male': 2},
'q3': {'18-25': 3, '26-30': 3}}
我想,而不是假裝我有以下人口:
- 50%男性
- 50%女性
- 40%18-15
- 60%26-30
怎麼會自動生成給予我要代表這是什麼數據的權重?對於與人口統計不符的給定值,我將假設權重爲1.
我對使用熊貓/ numpy感興趣,如果他們有幫助,但會使用任何最適合的工具。
對於單值加權我可能會做這樣的(我需要多個變量):
from pprint import pprint
q1 = [
'blue',
'orange',
'red',
]
q2 = [
'male',
'female',
]
q3 = [
'18-25',
'26-30',
'31-40',
'41+'
]
data = [
{'q1': 1, 'q2': 1, 'q3': 0}, # orange, female, 18-25
{'q1': 0, 'q2': 1, 'q3': 0}, # blue, female, 18-25
{'q1': 1, 'q2': 0, 'q3': 0}, # orange, male, 18-25
{'q1': 2, 'q2': 1, 'q3': 1}, # red, female, 26-30
{'q1': 2, 'q2': 1, 'q3': 1}, # red, female, 26-30
{'q1': 1, 'q2': 0, 'q3': 1}, # orange, male, 18-25
]
def get_counts(male_weight, female_weight):
counts = {
'q1': {},
'q2': {},
'q3': {}
}
for respondent in data:
q1_val = q1[respondent['q1']]
q2_val = q2[respondent['q2']]
q3_val = q3[respondent['q3']]
if q2_val == 'female':
respondent_value = female_weight
else:
respondent_value = male_weight
if q1_val not in counts['q1']:
counts['q1'][q1_val] = 0
counts['q1'][q1_val] += respondent_value
if q2_val not in counts['q2']:
counts['q2'][q2_val] = 0
counts['q2'][q2_val] += respondent_value
if q3_val not in counts['q3']:
counts['q3'][q3_val] = 0
counts['q3'][q3_val] += respondent_value
return counts
total_respondents = len(data) * 1.0
counts = get_counts(1, 1)
print("Starting counts")
print("=================")
pprint(counts)
print("\n")
female_pop = 50
male_pop = 50
sample_females = (counts['q2']['female']/total_respondents) * 100
sample_males = (counts['q2']['male']/total_respondents) * 100
female_weight = female_pop/sample_females
male_weight = male_pop/sample_males
weighted_counts = get_counts(male_weight, female_weight)
print("Weighted Counts")
print("===============")
pprint(weighted_counts)