ramu0e commited on
Commit
9654b1f
·
verified ·
1 Parent(s): 32f426e

Upload folder using huggingface_hub

Browse files
lam/config.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_depth": 5,
3
+ "action_dropout": 0.0,
4
+ "action_hidden_dim": 96,
5
+ "action_obs_dim": 0,
6
+ "action_prev_dim": 10,
7
+ "action_state_dim": 5,
8
+ "action_target_dim": 10,
9
+ "action_wide_dim": 512,
10
+ "architectures": [
11
+ "LAMModel"
12
+ ],
13
+ "decoder_attention_head_dim": 64,
14
+ "decoder_attn_implementation": "flash_attention_2",
15
+ "decoder_encoder_hidden_dim": 6,
16
+ "decoder_eps": 1e-06,
17
+ "decoder_ffn_dim": 768,
18
+ "decoder_freq_dim": 64,
19
+ "decoder_in_channels": 3,
20
+ "decoder_num_attention_heads": 3,
21
+ "decoder_num_layers": 12,
22
+ "decoder_out_channels": 3,
23
+ "decoder_patch_size": [
24
+ 4,
25
+ 4
26
+ ],
27
+ "decoder_pos_embed_seq_len": null,
28
+ "decoder_rope_max_seq_len": 1024,
29
+ "dtype": "bfloat16",
30
+ "encoder_height": 64,
31
+ "encoder_width": 64,
32
+ "fsq_levels": [
33
+ 8,
34
+ 8,
35
+ 8,
36
+ 5,
37
+ 5,
38
+ 5
39
+ ],
40
+ "initializer_range": 0.02,
41
+ "is_diffusion": true,
42
+ "latent_channels": 6,
43
+ "max_tokens": 256,
44
+ "min_tokens": 1,
45
+ "model_type": "lam",
46
+ "null_latent": 0,
47
+ "transformers_version": "4.57.1",
48
+ "use_tail_drop": true,
49
+ "videomae_config": {
50
+ "attn_drop_rate": 0.0,
51
+ "cos_attn": false,
52
+ "depth": 8,
53
+ "drop_path_rate": 0.0,
54
+ "drop_rate": 0.0,
55
+ "embed_dim": 192,
56
+ "img_size": [
57
+ 64,
58
+ 64
59
+ ],
60
+ "in_chans": 3,
61
+ "init_values": 0.0,
62
+ "layer_norm_eps": 1e-06,
63
+ "mlp_ratio": 4,
64
+ "norm_layer": "nn.LayerNorm",
65
+ "num_classes": 0,
66
+ "num_frames": 2,
67
+ "num_heads": 3,
68
+ "patch_size": 4,
69
+ "qk_scale": null,
70
+ "qkv_bias": true,
71
+ "tubelet_size": 2,
72
+ "use_learnable_pos_emb": false,
73
+ "use_mean_pooling": false,
74
+ "with_cp": false
75
+ },
76
+ "videomae_from_pretrained": null,
77
+ "vocab_size": 64000
78
+ }
lam/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:946f7eeff919289fca9a0a6b37d1bd3787f4268c8b1146fa7f61d1ea137cddc0
3
+ size 24600668
model_index.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "LAMPolicyPipeline",
3
+ "_diffusers_version": "0.35.2",
4
+ "lam": [
5
+ "flexlam_mini.models.lam.modeling_lam",
6
+ "LAMModel"
7
+ ],
8
+ "policy": [
9
+ "flexlam_mini.models.policy.modeling_policy",
10
+ "PolicyQwen3ForConditionalGeneration"
11
+ ],
12
+ "policy_processor": [
13
+ "transformers",
14
+ "Qwen2VLImageProcessor"
15
+ ],
16
+ "processor": [
17
+ "flexlam_mini.models.lam.processing_lam",
18
+ "LAMProcessorFast"
19
+ ],
20
+ "scheduler": [
21
+ "diffusers",
22
+ "FlowMatchEulerDiscreteScheduler"
23
+ ]
24
+ }
policy/config.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_latent_dim": 6,
3
+ "action_seq_len": 256,
4
+ "action_start_token_id": 64003,
5
+ "action_vocab_size": 64000,
6
+ "architectures": [
7
+ "PolicyQwen3ForConditionalGeneration"
8
+ ],
9
+ "attention_bias": false,
10
+ "attention_dropout": 0.0,
11
+ "dtype": "bfloat16",
12
+ "eos_token_id": 64004,
13
+ "head_dim": 64,
14
+ "hidden_act": "silu",
15
+ "hidden_size": 192,
16
+ "image_token_id": 64002,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 768,
19
+ "layer_types": [
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention"
28
+ ],
29
+ "max_position_embeddings": 2048,
30
+ "max_window_layers": 28,
31
+ "model_type": "policy_qwen3",
32
+ "num_attention_heads": 3,
33
+ "num_hidden_layers": 8,
34
+ "num_key_value_heads": 3,
35
+ "pad_token_id": 0,
36
+ "predict_tokens": false,
37
+ "rms_norm_eps": 1e-06,
38
+ "rope_scaling": null,
39
+ "rope_theta": 10000.0,
40
+ "sliding_window": null,
41
+ "tie_word_embeddings": false,
42
+ "transformers_version": "4.57.1",
43
+ "use_cache": false,
44
+ "use_sliding_window": false,
45
+ "vision_end_token_id": 64001,
46
+ "vision_in_channels": 3,
47
+ "vision_merge_size": 2,
48
+ "vision_patch_size": 4,
49
+ "vision_start_token_id": 64000,
50
+ "vocab_size": 64005
51
+ }
policy/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "eos_token_id": 64004,
4
+ "pad_token_id": 0,
5
+ "transformers_version": "4.57.1",
6
+ "use_cache": false
7
+ }
policy/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17f10d11358eec5b52f26e047e42bb792e02b9e453fdfd9460f0d01f876ad724
3
+ size 58632988
policy_processor/preprocessor_config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": true,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.48145466,
8
+ 0.4578275,
9
+ 0.40821073
10
+ ],
11
+ "image_processor_type": "Qwen2VLImageProcessor",
12
+ "image_std": [
13
+ 0.26862954,
14
+ 0.26130258,
15
+ 0.27577711
16
+ ],
17
+ "max_pixels": 1003520,
18
+ "merge_size": 2,
19
+ "min_pixels": 3136,
20
+ "patch_size": 4,
21
+ "resample": 3,
22
+ "rescale_factor": 0.00392156862745098,
23
+ "size": {
24
+ "longest_edge": 1003520,
25
+ "shortest_edge": 3136
26
+ },
27
+ "temporal_patch_size": 1
28
+ }
processor/processor_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "encoder_height": 64,
3
+ "encoder_width": 64,
4
+ "height": 64,
5
+ "processor_class": "LAMProcessorFast",
6
+ "width": 64
7
+ }
scheduler/scheduler_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "FlowMatchEulerDiscreteScheduler",
3
+ "_diffusers_version": "0.35.2",
4
+ "base_image_seq_len": 256,
5
+ "base_shift": 0.5,
6
+ "invert_sigmas": false,
7
+ "max_image_seq_len": 4096,
8
+ "max_shift": 1.15,
9
+ "num_train_timesteps": 1000,
10
+ "shift": 1.0,
11
+ "shift_terminal": null,
12
+ "stochastic_sampling": false,
13
+ "time_shift_type": "exponential",
14
+ "use_beta_sigmas": false,
15
+ "use_dynamic_shifting": false,
16
+ "use_exponential_sigmas": false,
17
+ "use_karras_sigmas": false
18
+ }